morph.mx raw

   1  package iskra
   2  
   3  import "git.smesh.lol/iskradb/lattice"
   4  
   5  // MorphState is a 5-bit field in Record.DataFile bits 1-5.
   6  //
   7  // Bit layout (wu xing mapping):
   8  //   bit 4 (earth, 16): tense       0=present  1=past
   9  //   bit 3 (wood,   8): aspect      0=simple   1=progressive
  10  //   bit 2 (metal,  4): polarity    0=affirm   1=negative
  11  //   bit 1 (water,  2): formality   0=plain    1=polite
  12  //   bit 0 (fire,   1): evidential  0=direct   1=reported
  13  //
  14  // State 0 = present simple affirmative plain direct (dictionary/base form).
  15  const (
  16  	MorphPresAffPlain   uint8 = 0  // 00000
  17  	MorphPresAffPolite  uint8 = 2  // 00010
  18  	MorphPresNegPlain   uint8 = 4  // 00100
  19  	MorphPresNegPolite  uint8 = 6  // 00110
  20  	MorphPresProgPlain  uint8 = 8  // 01000
  21  	MorphPresProgPolite uint8 = 10 // 01010
  22  	MorphPresProgNeg    uint8 = 12 // 01100
  23  	MorphPastAffPlain   uint8 = 16 // 10000
  24  	MorphPastAffPolite  uint8 = 18 // 10010
  25  	MorphPastNegPlain   uint8 = 20 // 10100
  26  	MorphPastNegPolite  uint8 = 22 // 10110
  27  	MorphPastProgPlain  uint8 = 24 // 11000
  28  	MorphPastProgPolite uint8 = 26 // 11010
  29  	MorphPastProgNeg    uint8 = 28 // 11100
  30  	MorphPresReported   uint8 = 1  // 00001
  31  	MorphPastReported   uint8 = 17 // 10001
  32  
  33  	// Language activation masks.
  34  	MaskJA uint8 = 0b11111 // all 5 bits
  35  	MaskEN uint8 = 0b11100 // tense + aspect + polarity only
  36  	MaskBG uint8 = 0b11111 // all 5 bits
  37  )
  38  
  39  // Register values (Record.Branch bits 3-4).
  40  const (
  41  	RegNeutral  uint8 = 0
  42  	RegFormal   uint8 = 1
  43  	RegInformal uint8 = 2
  44  	RegVulgar   uint8 = 3
  45  )
  46  
  47  // Domain values (Record.Branch bits 5-6).
  48  const (
  49  	DomGeneral   uint8 = 0
  50  	DomTechnical uint8 = 1
  51  	DomMedical   uint8 = 2
  52  	DomLegal     uint8 = 3
  53  )
  54  
  55  // Special values (Record.Branch bit 7).
  56  const (
  57  	SpecNormal    uint8 = 0
  58  	SpecHonorific uint8 = 1
  59  	SpecArchaic         = SpecNormal
  60  	SpecHumble          = RegFormal
  61  )
  62  
  63  // Filter constants for register-based translation filtering.
  64  const (
  65  	FilterNone    uint8 = 0
  66  	FilterFormal  uint8 = 1
  67  	FilterNeutral uint8 = 2
  68  	FilterCasual  uint8 = 3
  69  )
  70  
  71  // SetMorphState packs a 5-bit morph state into Record.DataFile bits 1-5.
  72  func SetMorphState(rec *lattice.Record, state uint8) {
  73  	rec.DataFile = (rec.DataFile & 0xFFFFFFC1) | (uint32(state&0x1F) << 1)
  74  }
  75  
  76  // GetMorphState extracts the 5-bit morph state from Record.DataFile bits 1-5.
  77  func GetMorphState(rec *lattice.Record) uint8 {
  78  	return uint8((rec.DataFile >> 1) & 0x1F)
  79  }
  80  
  81  const semanticDataFileShift = 6
  82  
  83  // SetSemanticInDataFile packs 16-bit semantic flags into Record.DataFile bits 6-21.
  84  func SetSemanticInDataFile(rec *lattice.Record, flags uint64) {
  85  	rec.DataFile = (rec.DataFile &^ (0xFFFF << semanticDataFileShift)) |
  86  		(uint32(flags&0xFFFF) << semanticDataFileShift)
  87  }
  88  
  89  // GetSemanticFromDataFile extracts the 16-bit semantic flags from Record.DataFile bits 6-21.
  90  func GetSemanticFromDataFile(rec *lattice.Record) uint64 {
  91  	return uint64((rec.DataFile >> semanticDataFileShift) & 0xFFFF)
  92  }
  93  
  94  // PackBranch encodes POS (3 bits), register, domain, and honorific into a Branch byte.
  95  func PackBranch(pos uint8, reg, dom, spec uint8) uint8 {
  96  	return (pos & 0x07) | ((reg & 0x03) << 3) | ((dom & 0x03) << 5) | ((spec & 0x01) << 7)
  97  }
  98  
  99  // POSFromBranch extracts the 3-bit POS (branch index 0-7) from a packed Branch byte.
 100  func POSFromBranch(b uint8) uint8 { return b & 0x07 }
 101  
 102  // RegFromBranch extracts the register bits (3-4).
 103  func RegFromBranch(b uint8) uint8 { return (b >> 3) & 0x03 }
 104  
 105  // DomFromBranch extracts the domain bits (5-6).
 106  func DomFromBranch(b uint8) uint8 { return (b >> 5) & 0x03 }
 107  
 108  // SpecFromBranch extracts the honorific bit (7).
 109  func SpecFromBranch(b uint8) uint8 { return (b >> 7) & 0x01 }
 110  
 111  // BranchWeirdness returns a tiebreaker score for register ranking. Lower = more preferred.
 112  func BranchWeirdness(b uint8) uint32 {
 113  	reg := uint32(RegFromBranch(b))
 114  	spec := uint32(SpecFromBranch(b))
 115  	return reg*10 + spec*5
 116  }
 117  
 118  // MatchesFilter returns true if a packed Branch byte is acceptable under the filter.
 119  func MatchesFilter(b uint8, filter uint8) bool {
 120  	reg := RegFromBranch(b)
 121  	switch filter {
 122  	case FilterNone:
 123  		return true
 124  	case FilterFormal:
 125  		return reg != RegVulgar
 126  	case FilterNeutral:
 127  		return reg == RegNeutral
 128  	case FilterCasual:
 129  		return reg != RegFormal && SpecFromBranch(b) != SpecHonorific
 130  	}
 131  	return true
 132  }
 133