ingest_pattern.mx raw

   1  package iskra
   2  
   3  import (
   4  	"math"
   5  
   6  	"git.smesh.lol/iskradb/lattice"
   7  )
   8  
   9  // Default Gaussian σ for register-coordinate filtering in LookupAtomLink.
  10  // σ_archaic is tighter than σ_discourse: archaism is a stronger semantic
  11  // register mismatch than mere sentence-length difference. Tunable per call.
  12  const (
  13  	DefaultSigmaArchaic   = 64.0
  14  	DefaultSigmaDiscourse = 128.0
  15  	// diversityNearThreshold: the Gaussian factor below which a corpus
  16  	// coord doesn't count toward the per-DstAtom diversity bonus.
  17  	// Coords with Gaussian < 0.05 (i.e. ~20× muted) are too far to be
  18  	// considered "supporting evidence" for a translation. Empirically:
  19  	// bible (255,199) from query (0,0) gives ~1.4e-8; KFTT (5,171) from
  20  	// (0,0) gives ~0.17 - KFTT counts as supporting evidence, bible does not.
  21  	diversityNearThreshold = 0.05
  22  )
  23  
  24  // IngestPattern stores atoms and patterns from an extraction result.
  25  // Returns the pattern recIdx for cross-domain linking.
  26  func IngestPattern(t *Tree, domain uint8, ext ExtractResult) uint32 {
  27  	if len(ext.Pattern) == 0 {
  28  		return lattice.NullRec
  29  	}
  30  
  31  	// 1. Upsert the pattern record (Bgrammatical branch).
  32  	patKey := PatternKey(domain, ext.Pattern)
  33  	patRI := t.LookupRecIdx(lattice.Bgrammatical, patKey)
  34  	if patRI != lattice.NullRec {
  35  		t.metaInc(patRI)
  36  	} else {
  37  		var rec lattice.Record
  38  		t.setFormOnRec(&rec, string(ext.Pattern))
  39  		rec.Branch = uint8(lattice.Bgrammatical)
  40  		patRI = t.db.InsertRec(lattice.Bgrammatical, patKey, rec)
  41  		t.metaSet(patRI, MetaEntry{Count: 1, StageTag: domain})
  42  	}
  43  
  44  	// 2. Upsert each content slot as an atom (Bsemantic branch).
  45  	// Use lemma (from Set) as the atom key when available; fall back to surface form.
  46  	for i, word := range ext.Slots {
  47  		if word == "" {
  48  			continue
  49  		}
  50  		atomForm := word
  51  		if i < len(ext.Set) && ext.Set[i].Atom != "" {
  52  			atomForm = ext.Set[i].Atom
  53  		}
  54  		atomKey := AtomKey(domain, atomForm)
  55  		atomRI := t.LookupRecIdx(lattice.Bsemantic, atomKey)
  56  		if atomRI != lattice.NullRec {
  57  			t.metaInc(atomRI)
  58  			if i < len(ext.Roles) {
  59  				m := t.metaGet(atomRI)
  60  				if m != nil {
  61  					var h RoleHist
  62  					h.Decode(m.Extra)
  63  					h[ext.Roles[i]]++
  64  					h.Encode(&m.Extra)
  65  					if t.BulkMetaStore != nil {
  66  						t.BulkMetaStore.dirty[atomRI] = true
  67  					}
  68  				}
  69  			}
  70  		} else {
  71  			var rec lattice.Record
  72  			t.setFormOnRec(&rec, atomForm)
  73  			rec.Branch = uint8(lattice.Bsemantic)
  74  			atomRI = t.db.InsertRec(lattice.Bsemantic, atomKey, rec)
  75  			m := MetaEntry{Count: 1, StageTag: domain}
  76  			if i < len(ext.Roles) {
  77  				var h RoleHist
  78  				h[ext.Roles[i]] = 1
  79  				h.Encode(&m.Extra)
  80  			}
  81  			t.metaSet(atomRI, m)
  82  		}
  83  	}
  84  
  85  	return patRI
  86  }
  87  
  88  // IngestCrossDomain records a structural alignment between two patterns.
  89  // Called when a JA sentence pattern corresponds to an EN sentence pattern.
  90  func IngestCrossDomain(t *Tree, srcDomain, dstDomain uint8, srcPat, dstPat []byte) {
  91  	if len(srcPat) == 0 || len(dstPat) == 0 {
  92  		return
  93  	}
  94  	key := CrossPatternKey(srcDomain, dstDomain, srcPat, dstPat)
  95  	ri := t.LookupRecIdx(lattice.Bcooccur, key)
  96  	if ri != lattice.NullRec {
  97  		t.metaInc(ri)
  98  		return
  99  	}
 100  	form := string(srcPat) | "=" | string(dstPat)
 101  	var rec lattice.Record
 102  	t.setFormOnRec(&rec, form)
 103  	rec.Branch = uint8(lattice.Bcooccur)
 104  	ri = t.db.InsertRec(lattice.Bcooccur, key, rec)
 105  	t.metaSet(ri, MetaEntry{Count: 1, StageTag: srcDomain})
 106  }
 107  
 108  // IngestDeepPattern stores a canonical deep pattern and increments its count.
 109  // Deep patterns are language-independent role sequences shared across domains.
 110  func IngestDeepPattern(t *Tree, deepPat []byte) {
 111  	if len(deepPat) == 0 {
 112  		return
 113  	}
 114  	key := DeepPatternKey(deepPat)
 115  	ri := t.LookupRecIdx(lattice.Bgrammatical, key)
 116  	if ri != lattice.NullRec {
 117  		t.metaInc(ri)
 118  		return
 119  	}
 120  	var rec lattice.Record
 121  	t.setFormOnRec(&rec, string(deepPat))
 122  	rec.Branch = uint8(lattice.Bgrammatical)
 123  	ri = t.db.InsertRec(lattice.Bgrammatical, key, rec)
 124  	t.metaSet(ri, MetaEntry{Count: 1, StageTag: 0}) // domain 0 = cross-domain
 125  }
 126  
 127  // Atom-link generation marker. Stored in MetaEntry.StageTag's high bit-zone
 128  // to distinguish:
 129  //   GenLegacy (0)  - records written by the bilateral IngestAtomLink before
 130  //                    the context-aware schema landed; role/context fields
 131  //                    are empty/unknown. Used as a translation fallback.
 132  //   GenContexted (1) - records written by IngestContextedAtomLink with
 133  //                      role and governing-context populated. The preferred
 134  //                      lookup path.
 135  const (
 136  	GenLegacy    uint8 = 0
 137  	GenContexted uint8 = 1
 138  	GenDictionary uint8 = 2
 139  )
 140  
 141  // pronounPerson returns the grammatical person (1, 2, 3) of a pronoun
 142  // atom, or 0 if the atom is not a known pronoun. Used to prevent
 143  // cross-person pronoun links during ingestion.
 144  func pronounPerson(lang uint8, atom string) int32 {
 145  	if lang == 1 {
 146  		switch atom {
 147  		case "i", "me", "my", "myself", "we", "us", "our", "ourselves":
 148  			return 1
 149  		case "you", "your", "yourself", "yourselves":
 150  			return 2
 151  		case "he", "him", "his", "himself",
 152  			"she", "her", "herself",
 153  			"they", "them", "their", "themselves":
 154  			return 3
 155  		case "it", "itself":
 156  			return 4 // inanimate - only links to JA demonstratives, not human pronouns
 157  		}
 158  	} else if lang == 2 {
 159  		switch atom {
 160  		case "\xe7\xa7\x81",         // 私
 161  			"\xe5\x83\x95",         // 僕
 162  			"\xe4\xbf\xba",         // 俺
 163  			"\xe3\x82\x8f\xe3\x81\x97", // わし
 164  			"\xe8\x87\xaa\xe5\x88\x86", // 自分
 165  			"\xe7\xa7\x81\xe3\x81\x9f\xe3\x81\xa1", // 私たち
 166  			"\xe6\x88\x91\xe3\x80\x85": // 我々
 167  			return 1
 168  		case "\xe3\x81\x82\xe3\x81\xaa\xe3\x81\x9f", // あなた
 169  			"\xe5\x90\x9b",         // 君
 170  			"\xe3\x81\x8a\xe5\x89\x8d", // お前
 171  			"\xe3\x81\x82\xe3\x82\x93\xe3\x81\x9f": // あんた
 172  			return 2
 173  		case "\xe5\xbd\xbc",         // 彼
 174  			"\xe5\xbd\xbc\xe5\xa5\xb3", // 彼女
 175  			"\xe5\xbd\xbc\xe3\x82\x89": // 彼ら
 176  			return 3
 177  		case "\xe3\x81\x9d\xe3\x82\x8c", // それ
 178  			"\xe3\x81\x93\xe3\x82\x8c", // これ
 179  			"\xe3\x81\x82\xe3\x82\x8c": // あれ
 180  			return 4 // inanimate demonstratives
 181  		}
 182  	}
 183  	return 0
 184  }
 185  
 186  func isSingleKana(s string) bool {
 187  	return len(s) == 3 && s[0] == 0xe3 && (s[1] == 0x81 || s[1] == 0x82 || s[1] == 0x83)
 188  }
 189  
 190  // isJunkJAAtom filters JA atoms that are lemmatizer artifacts.
 191  // っ+single-hiragana (e.g. っう, っく) are malformed godan stems
 192  // produced when the lemmatizer over-strips a verb.
 193  func isJunkJAAtom(s string) bool {
 194  	if len(s) == 6 && s[0] == 0xe3 && s[1] == 0x81 && s[2] == 0xa3 &&
 195  		s[3] == 0xe3 && s[4] == 0x81 {
 196  		return true
 197  	}
 198  	return false
 199  }
 200  
 201  // AtomLinkKey constructs the lattice key for a context-aware atom-link
 202  // record. Composite of (langA, langB, "X", roleA, gen, rArch, rDisc,
 203  // atomA \0 contextA \0 atomB) hashed via SipHash.
 204  //
 205  // Register coordinate (rArch, rDisc) is in the key so the same atom pair
 206  // from different-register corpora produces distinct records. This keeps
 207  // scripture-derived associations from polluting modern-conversational
 208  // lookups even when the atoms collide.
 209  //
 210  // Note: this is a point-lookup key. Prefix-scan queries are served by a
 211  // sidecar index, not by key structure.
 212  func AtomLinkKey(langA, langB, roleA, gen, rArch, rDisc uint8, atomA, contextA, atomB string) lattice.Key {
 213  	n := 7 + len(atomA) + 1 + len(contextA) + 1 + len(atomB)
 214  	buf := []byte{:n:n}
 215  	buf[0] = langA
 216  	buf[1] = langB
 217  	buf[2] = 'X'
 218  	buf[3] = roleA
 219  	buf[4] = gen
 220  	buf[5] = rArch
 221  	buf[6] = rDisc
 222  	off := 7
 223  	copy(buf[off:], []byte(atomA))
 224  	off += len(atomA)
 225  	buf[off] = 0x00
 226  	off++
 227  	copy(buf[off:], []byte(contextA))
 228  	off += len(contextA)
 229  	buf[off] = 0x00
 230  	off++
 231  	copy(buf[off:], []byte(atomB))
 232  	return lattice.HashKey(buf)
 233  }
 234  
 235  // IngestContextedAtomLink records a word-level cross-language link with
 236  // role and governing-context tagging. ContextA is an atom from the same
 237  // language as atomA (the immediate head when Head>=0, the clause's
 238  // predicate atom when Head==-1, or empty string for the predicate itself).
 239  // Same for contextB.
 240  //
 241  // Generation marker distinguishes legacy lossy-migrated records from
 242  // proper context-aware records; the lookup function prefers GenContexted
 243  // matches and falls back to GenLegacy.
 244  func IngestContextedAtomLink(t *Tree,
 245  	langA, langB uint8,
 246  	atomA, contextA string, roleA int32,
 247  	atomB, contextB string, roleB int32,
 248  	rArch, rDisc uint8,
 249  ) {
 250  	// Lemmatize per language at ingest time so inflected forms collapse.
 251  	if langA == 1 {
 252  		atomA = LemmatizeEN(atomA).Lemma
 253  	} else if langA == 2 {
 254  		atomA = LemmatizeJA(atomA, roleA == HistVerb).Lemma
 255  	}
 256  	if langB == 1 {
 257  		atomB = LemmatizeEN(atomB).Lemma
 258  	} else if langB == 2 {
 259  		atomB = LemmatizeJA(atomB, roleB == HistVerb).Lemma
 260  	}
 261  	if atomA == "" || atomB == "" {
 262  		return
 263  	}
 264  	if isSingleKana(atomA) || isSingleKana(atomB) {
 265  		return
 266  	}
 267  	if isJunkJAAtom(atomA) || isJunkJAAtom(atomB) {
 268  		return
 269  	}
 270  	// Person-concordance filter: don't link 1st-person pronouns to
 271  	// 2nd/3rd-person pronouns across languages. JA restructures
 272  	// predication (EN "I love you" -> JA "君が好きだ") so role-based
 273  	// alignment creates false cross-person pronoun links.
 274  	pA := pronounPerson(langA, atomA)
 275  	pB := pronounPerson(langB, atomB)
 276  	if pA > 0 && pB > 0 && pA != pB {
 277  		return
 278  	}
 279  	// Pronouns only link to pronouns. Prevents structural misalignment
 280  	// where EN "you" (SUBJECT) links to JA 物 (SUBJECT) because JA
 281  	// restructured the predication.
 282  	if pA > 0 && pB == 0 {
 283  		return
 284  	}
 285  	if pB > 0 && pA == 0 {
 286  		return
 287  	}
 288  
 289  	key := AtomLinkKey(langA, langB, uint8(roleA), GenContexted, rArch, rDisc,
 290  		atomA, contextA, atomB)
 291  	ri := t.LookupRecIdx(lattice.Bpragmatic, key)
 292  	if ri != lattice.NullRec {
 293  		t.metaInc(ri)
 294  		return
 295  	}
 296  	form := atomA | "|" | contextA | "|" | atomB | "|" | contextB
 297  	var rec lattice.Record
 298  	t.setFormOnRec(&rec, form)
 299  	rec.Branch = uint8(lattice.Bpragmatic)
 300  	ri = t.db.InsertRec(lattice.Bpragmatic, key, rec)
 301  	stageTag := langA | (GenContexted << 4)
 302  	t.metaSet(ri, MetaEntry{Count: 1, StageTag: stageTag})
 303  	// Extra layout for GenContexted records:
 304  	//   Extra[0]: roleB
 305  	//   Extra[1]: langB
 306  	//   Extra[2]: R_archaic (corpus register coordinate)
 307  	//   Extra[3]: R_discourse
 308  	//   Extra[4]: roleA
 309  	m := t.metaGet(ri)
 310  	if m != nil {
 311  		m.Extra[0] = uint8(roleB)
 312  		m.Extra[1] = langB
 313  		m.Extra[2] = rArch
 314  		m.Extra[3] = rDisc
 315  		m.Extra[4] = uint8(roleA)
 316  		if t.BulkMetaStore != nil {
 317  			t.BulkMetaStore.dirty[ri] = true
 318  		}
 319  	}
 320  }
 321  
 322  // AtomLinkResult is the return type of LookupAtomLink. Carries the
 323  // destination atom and provenance information for diagnostic visibility.
 324  type AtomLinkResult struct {
 325  	DstAtom    string
 326  	DstRole    int32
 327  	DstContext string
 328  	Weight     uint32
 329  	Generation uint8 // 0 = legacy fallback, 1 = context-aware preferred match
 330  	Tier       uint8 // 1-4 relaxation tier that produced the pick; 0 = no match
 331  }
 332  
 333  // LookupAtomLink finds the best destination atom for (srcLang, srcAtom)
 334  // in dstLang via the sidecar index. Each candidate is scored by:
 335  //
 336  //   score = log(1 + weight) × diversity_near × exp(-distance²/σ²)
 337  //
 338  // Three components:
 339  //
 340  //  1. log(1 + weight) - logarithmic in observation count. Compresses the
 341  //     differentiation between high-frequency records so a Tatoeba-
 342  //     memorized wrong mapping with weight=50 (score ~3.93) doesn't
 343  //     outvote a less-frequent correct one with weight=5 (score ~1.79)
 344  //     by orders of magnitude. Bayesian intuition: the 50 observations
 345  //     from one corpus are correlated, not independent; their information
 346  //     content scales sub-linearly.
 347  //
 348  //  2. diversity_near - count of distinct corpus register-coordinates
 349  //     among candidates with the same DstAtom, FILTERED to coords whose
 350  //     Gaussian factor is above the diversityNearThreshold. A DstAtom
 351  //     backed by 3 corpora near the query has diversity_near=3; one
 352  //     backed by Bible-only (far from a modern query) has near=0,
 353  //     defaulting to 1. This prevents far-register records from padding
 354  //     the diversity of an irrelevant DstAtom.
 355  //
 356  //  3. Gaussian distance - per-record register-axis filter. Records far
 357  //     from the query coord get muted. Documented in the register coord
 358  //     design.
 359  //
 360  // Net effect: corpus diversity outweighs raw count when the diversity
 361  // is in-register. A Tatoeba-only correct mapping at weight=20 scores
 362  // log(21)*1 = 3.04. A multi-corpus correct mapping at weight=5 each
 363  // across 3 near corpora scores log(6)*3 = 5.38. The diverse one wins.
 364  // A Tatoeba+Bible "diverse" but in-register-singular wrong mapping
 365  // scores log(weight)*1 because Bible's coord is filtered out.
 366  //
 367  // Tier order (within tier, highest score wins):
 368  //   Tier 1: GenContexted match with exact (srcContext, srcRole)
 369  //   Tier 2: GenContexted match with same srcRole, any context
 370  //   Tier 3: GenContexted match with any role, any context
 371  //   Tier 4: GenLegacy bilateral fallback
 372  //
 373  // If sigmaArch/sigmaDisc are 0 the Gaussian factor is omitted.
 374  // IngestStats tracks diagnostic counters for the trilateral scoring pipeline.
 375  type IngestStats struct {
 376  	TriFired          int32
 377  	TriConfirmed      int32
 378  	TriSwapped        int32
 379  	TriRescued        int32
 380  	CtxSimFired       int32
 381  	CtxSimBoosted     int32
 382  	DictConfirmFired  int32
 383  	DictAuthorityFired int32
 384  }
 385  
 386  // dictPOSMatch returns true when a dictionary entry's POS-derived role
 387  // (dictRole) is compatible with the query atom's contextual role (queryRole).
 388  // POS-role mapping from dict-ingest: verb->3, adj/adv->4, noun/name->1, else->7.
 389  func dictPOSMatch(dictRole, queryRole int32) bool {
 390  	switch dictRole {
 391  	case HistVerb:
 392  		return queryRole == HistVerb
 393  	case HistModifier:
 394  		return queryRole == HistModifier || queryRole == HistScope
 395  	case HistSubject:
 396  		return queryRole == HistSubject || queryRole == HistObject ||
 397  			queryRole == HistTopic || queryRole == HistComplement ||
 398  			queryRole == HistScope
 399  	case HistComplement:
 400  		return true
 401  	}
 402  	return true
 403  }
 404  
 405  func LookupAtomLink(idx *AtomIdx, srcLang, dstLang uint8,
 406  	srcAtom, srcContext string, srcRole int32,
 407  	qArch, qDisc uint8, sigmaArch, sigmaDisc float64,
 408  	stats *IngestStats,
 409  ) AtomLinkResult {
 410  	if idx == nil {
 411  		return AtomLinkResult{}
 412  	}
 413  	candidates := idx.FindBySrc(srcLang, srcAtom)
 414  	if len(candidates) == 0 {
 415  		return AtomLinkResult{}
 416  	}
 417  
 418  	gauss := func(rArch, rDisc uint8) float64 {
 419  		if sigmaArch <= 0 || sigmaDisc <= 0 {
 420  			return 1.0
 421  		}
 422  		da := float64(int32(rArch) - int32(qArch))
 423  		dd := float64(int32(rDisc) - int32(qDisc))
 424  		exponent := (da*da)/(sigmaArch*sigmaArch) + (dd*dd)/(sigmaDisc*sigmaDisc)
 425  		return math.Exp(-exponent)
 426  	}
 427  
 428  	// First pass: per-DstAtom diversity_near. Count distinct corpus
 429  	// coords whose Gaussian factor is above diversityNearThreshold.
 430  	// Coords beyond the threshold are too register-distant to count as
 431  	// supporting evidence for a translation.
 432  	type coordSet map[uint16]bool
 433  	diversity := map[string]coordSet{}
 434  	for i := range candidates {
 435  		e := &candidates[i]
 436  		if e.DstLang != dstLang {
 437  			continue
 438  		}
 439  		if gauss(e.RArchaic, e.RDiscourse) < diversityNearThreshold {
 440  			continue
 441  		}
 442  		coord := uint16(e.RArchaic)<<8 | uint16(e.RDiscourse)
 443  		s := diversity[e.DstAtom]
 444  		if s == nil {
 445  			s = coordSet{}
 446  			diversity[e.DstAtom] = s
 447  		}
 448  		s[coord] = true
 449  	}
 450  
 451  	baseScore := func(e *AtomIdxEntry) float64 {
 452  		w := math.Log1p(float64(e.Weight))
 453  		div := float64(len(diversity[e.DstAtom]))
 454  		if div < 1 {
 455  			div = 1
 456  		}
 457  		return w * div * gauss(e.RArchaic, e.RDiscourse)
 458  	}
 459  
 460  	// Context-similarity: translate srcContext to dstLang once.
 461  	// When a tier-2 candidate's ContextB matches, the candidate was
 462  	// observed with the same governing head (translated) as the query -
 463  	// strong polysemy disambiguation signal.
 464  	ctxTranslation := ""
 465  	if srcContext != "" {
 466  		ctxCands := idx.FindBySrc(srcLang, srcContext)
 467  		var bestCtxW float64
 468  		for j := range ctxCands {
 469  			cc := &ctxCands[j]
 470  			if cc.DstLang != dstLang {
 471  				continue
 472  			}
 473  			w := float64(cc.Weight) * gauss(cc.RArchaic, cc.RDiscourse)
 474  			if w > bestCtxW {
 475  				bestCtxW = w
 476  				ctxTranslation = cc.DstAtom
 477  			}
 478  		}
 479  	}
 480  
 481  	// Bilateral consistency: only check the top candidate per tier to
 482  	// avoid O(K*M) backward lookups on polysemous atoms.
 483  	biCheck := func(dstAtom string) float64 {
 484  		backCands := idx.FindBySrc(dstLang, dstAtom)
 485  		var best, src float64
 486  		for j := range backCands {
 487  			bc := &backCands[j]
 488  			if bc.DstLang != srcLang {
 489  				continue
 490  			}
 491  			w := float64(bc.Weight)
 492  			if w > best {
 493  				best = w
 494  			}
 495  			if bc.DstAtom == srcAtom && w > src {
 496  				src = w
 497  			}
 498  		}
 499  		if best <= 0 {
 500  			return 1.0
 501  		}
 502  		return (src + 1) / (best + 1)
 503  	}
 504  
 505  	// Per-DstAtom aggregation: sum baseScore across all entries for the
 506  	// same destination atom within each tier. Polysemous atoms observed
 507  	// in many contexts accumulate evidence.
 508  	type atomAgg struct {
 509  		bestEntry *AtomIdxEntry
 510  		aggScore  float64
 511  		tier      int32
 512  	}
 513  	tierAtoms := map[string]*atomAgg{}
 514  
 515  	for i := range candidates {
 516  		e := &candidates[i]
 517  		if e.DstLang != dstLang {
 518  			continue
 519  		}
 520  		s := baseScore(e)
 521  		ti := -1
 522  		if e.Gen == GenContexted {
 523  			if e.ContextA == srcContext && int32(e.RoleA) == srcRole {
 524  				ti = 0
 525  			} else if int32(e.RoleA) == srcRole {
 526  				ti = 1
 527  			} else {
 528  				ti = 2
 529  			}
 530  		} else if e.Gen == GenLegacy {
 531  			ti = 3
 532  		} else if e.Gen == GenDictionary {
 533  			ti = 4
 534  		}
 535  		if ti < 0 {
 536  			continue
 537  		}
 538  		key := string([]byte{byte(ti), ':'}) | e.DstAtom
 539  		a := tierAtoms[key]
 540  		if a == nil {
 541  			a = &atomAgg{tier: ti}
 542  			tierAtoms[key] = a
 543  		}
 544  		a.aggScore += s
 545  		if a.bestEntry == nil || s > baseScore(a.bestEntry) {
 546  			a.bestEntry = e
 547  		}
 548  	}
 549  
 550  	// Collect top-N per tier for bilateral scoring.
 551  	const topN = 8
 552  	type ranked struct {
 553  		entry    *AtomIdxEntry
 554  		aggScore float64
 555  		dstAtom  string
 556  	}
 557  	var top [5][topN]ranked
 558  	for _, a := range tierAtoms {
 559  		ti := a.tier
 560  		s := a.aggScore
 561  		slot := -1
 562  		for k := 0; k < topN; k++ {
 563  			if top[ti][k].entry == nil {
 564  				slot = k
 565  				break
 566  			}
 567  			if s > top[ti][k].aggScore {
 568  				slot = k
 569  				break
 570  			}
 571  		}
 572  		if slot < 0 {
 573  			continue
 574  		}
 575  		for k := topN - 1; k > slot; k-- {
 576  			top[ti][k] = top[ti][k-1]
 577  		}
 578  		top[ti][slot] = ranked{entry: a.bestEntry, aggScore: s, dstAtom: a.bestEntry.DstAtom}
 579  	}
 580  
 581  	// triConfirm checks whether srcAtom->dstAtom is confirmed by a
 582  	// 2-hop path through an intermediate language.
 583  	intermediateLangs := [2]uint8{0x03, 0x04} // KO, ZH
 584  	triConfirm := func(dstAtom string) int32 {
 585  		confirms := 0
 586  		for _, mid := range intermediateLangs {
 587  			if mid == srcLang || mid == dstLang {
 588  				continue
 589  			}
 590  			srcMid := topAtomVia(idx, srcLang, mid, srcAtom)
 591  			if srcMid == "" {
 592  				continue
 593  			}
 594  			dstMid := topAtomVia(idx, dstLang, mid, dstAtom)
 595  			if dstMid == "" {
 596  				continue
 597  			}
 598  			if srcMid == dstMid {
 599  				confirms++
 600  			}
 601  		}
 602  		return confirms
 603  	}
 604  
 605  	ctxMatch := func(e *AtomIdxEntry) bool {
 606  		return ctxTranslation != "" && e.ContextB == ctxTranslation
 607  	}
 608  
 609  	dictConfirm := func(dstAtom string) bool {
 610  		for i := range candidates {
 611  			e := &candidates[i]
 612  			if e.Gen != GenDictionary || e.DstLang != dstLang {
 613  				continue
 614  			}
 615  			if !dictPOSMatch(int32(e.RoleA), srcRole) {
 616  				continue
 617  			}
 618  			if e.DstAtom == dstAtom {
 619  				return true
 620  			}
 621  			// Fuzzy: corpus "search" matches dict "search for",
 622  			// or corpus "carry out" matches dict "carry".
 623  			if dstLang == 1 {
 624  				da := e.DstAtom
 625  				if len(dstAtom) < len(da) && da[:len(dstAtom)] == dstAtom && da[len(dstAtom)] == ' ' {
 626  					return true
 627  				}
 628  				if len(da) < len(dstAtom) && dstAtom[:len(da)] == da && dstAtom[len(da)] == ' ' {
 629  					return true
 630  				}
 631  			}
 632  		}
 633  		return false
 634  	}
 635  
 636  	// Combined scoring: bilateral ratio modulates aggregate score.
 637  	// ctx-sim, triangulation, and dictionary confirmation are bonuses.
 638  	type scored struct {
 639  		entry    *AtomIdxEntry
 640  		combined float64
 641  		tier     int32
 642  		dictOK   bool
 643  	}
 644  	var viable []scored
 645  
 646  	for ti := 0; ti < 4; ti++ {
 647  		topAgg := 0.0
 648  		if top[ti][0].entry != nil {
 649  			topAgg = top[ti][0].aggScore
 650  		}
 651  		for k := 0; k < topN; k++ {
 652  			r := &top[ti][k]
 653  			if r.entry == nil {
 654  				continue
 655  			}
 656  			if topAgg > 0 && r.aggScore < topAgg*0.1 {
 657  				continue
 658  			}
 659  			bi := biCheck(r.dstAtom)
 660  			combined := r.aggScore * (bi + 0.05)
 661  			if ctxMatch(r.entry) {
 662  				combined *= 1.5
 663  				stats.CtxSimFired++
 664  			}
 665  			dc := dictConfirm(r.dstAtom)
 666  			if dc {
 667  				combined *= 1.6
 668  				stats.DictConfirmFired++
 669  			}
 670  			viable = append(viable, scored{entry: r.entry, combined: combined, tier: ti, dictOK: dc})
 671  		}
 672  		if len(viable) > 0 {
 673  			break
 674  		}
 675  	}
 676  	// Dictionary authority counter: track when dict-confirmed candidates
 677  	// exist in the viable set (for diagnostics).
 678  	for _, v := range viable {
 679  		if v.dictOK {
 680  			stats.DictAuthorityFired++
 681  			break
 682  		}
 683  	}
 684  	// Tier-4 (dictionary) only as last resort when corpus tiers empty.
 685  	// Dict entries are pre-validated translations, so use a biRatio floor
 686  	// to prevent polysemous back-indexes from over-penalizing common words.
 687  	if len(viable) == 0 {
 688  		for k := 0; k < topN; k++ {
 689  			r := &top[4][k]
 690  			if r.entry == nil {
 691  				continue
 692  			}
 693  			bi := biCheck(r.dstAtom)
 694  			if bi < 0.25 {
 695  				bi = 0.25
 696  			}
 697  			combined := r.aggScore * (bi + 0.05)
 698  			dc := dictConfirm(r.dstAtom)
 699  			if dc {
 700  				combined *= 1.6
 701  				stats.DictConfirmFired++
 702  			}
 703  			viable = append(viable, scored{entry: r.entry, combined: combined, tier: 4, dictOK: dc})
 704  		}
 705  	}
 706  
 707  	// Triangulation bonus on top-2 viable candidates.
 708  	if len(viable) >= 2 {
 709  		stats.TriFired++
 710  		tc0 := triConfirm(viable[0].entry.DstAtom)
 711  		tc1 := triConfirm(viable[1].entry.DstAtom)
 712  		if tc0 > 0 || tc1 > 0 {
 713  			stats.TriConfirmed++
 714  		}
 715  		if tc1 > 0 && tc0 == 0 {
 716  			viable[1].combined *= 1.3
 717  			stats.TriSwapped++
 718  		} else if tc0 > 0 && tc1 == 0 {
 719  			viable[0].combined *= 1.3
 720  		}
 721  	} else if len(viable) == 1 {
 722  		stats.TriFired++
 723  		tc := triConfirm(viable[0].entry.DstAtom)
 724  		if tc > 0 {
 725  			stats.TriConfirmed++
 726  			stats.TriRescued++
 727  		}
 728  	}
 729  
 730  	// Pick highest combined score.
 731  	var pick *AtomIdxEntry
 732  	tier := uint8(0)
 733  	bestCombined := 0.0
 734  	for _, v := range viable {
 735  		if v.combined > bestCombined {
 736  			bestCombined = v.combined
 737  			pick = v.entry
 738  			tier = uint8(v.tier + 1)
 739  		}
 740  	}
 741  	if pick == nil {
 742  		return AtomLinkResult{}
 743  	}
 744  	return AtomLinkResult{
 745  		DstAtom:    pick.DstAtom,
 746  		DstRole:    int32(pick.RoleB),
 747  		DstContext: pick.ContextB,
 748  		Weight:     pick.Weight,
 749  		Generation: pick.Gen,
 750  		Tier:       tier,
 751  	}
 752  }
 753  
 754  // DiagCandidate holds scoring details for one candidate in the ranked list.
 755  type DiagCandidate struct {
 756  	DstAtom  string
 757  	AggScore float64
 758  	BiRatio  float64
 759  	Combined float64
 760  	CtxSim   bool
 761  	Tier     int32
 762  }
 763  
 764  // LookupAtomLinkDiag is LookupAtomLink with full candidate diagnostics.
 765  func LookupAtomLinkDiag(idx *AtomIdx, srcLang, dstLang uint8,
 766  	srcAtom, srcContext string, srcRole int32,
 767  	qArch, qDisc uint8, sigmaArch, sigmaDisc float64,
 768  ) (AtomLinkResult, []DiagCandidate) {
 769  	if idx == nil {
 770  		return AtomLinkResult{}, nil
 771  	}
 772  	candidates := idx.FindBySrc(srcLang, srcAtom)
 773  	if len(candidates) == 0 {
 774  		return AtomLinkResult{}, nil
 775  	}
 776  
 777  	gauss := func(rArch, rDisc uint8) float64 {
 778  		if sigmaArch <= 0 || sigmaDisc <= 0 {
 779  			return 1.0
 780  		}
 781  		da := float64(int32(rArch) - int32(qArch))
 782  		dd := float64(int32(rDisc) - int32(qDisc))
 783  		exponent := (da*da)/(sigmaArch*sigmaArch) + (dd*dd)/(sigmaDisc*sigmaDisc)
 784  		return math.Exp(-exponent)
 785  	}
 786  
 787  	type coordSet map[uint16]bool
 788  	diversity := map[string]coordSet{}
 789  	for i := range candidates {
 790  		e := &candidates[i]
 791  		if e.DstLang != dstLang {
 792  			continue
 793  		}
 794  		if gauss(e.RArchaic, e.RDiscourse) < diversityNearThreshold {
 795  			continue
 796  		}
 797  		coord := uint16(e.RArchaic)<<8 | uint16(e.RDiscourse)
 798  		s := diversity[e.DstAtom]
 799  		if s == nil {
 800  			s = coordSet{}
 801  			diversity[e.DstAtom] = s
 802  		}
 803  		s[coord] = true
 804  	}
 805  
 806  	baseScore := func(e *AtomIdxEntry) float64 {
 807  		w := math.Log1p(float64(e.Weight))
 808  		div := float64(len(diversity[e.DstAtom]))
 809  		if div < 1 {
 810  			div = 1
 811  		}
 812  		return w * div * gauss(e.RArchaic, e.RDiscourse)
 813  	}
 814  
 815  	ctxTranslation := ""
 816  	if srcContext != "" {
 817  		ctxCands := idx.FindBySrc(srcLang, srcContext)
 818  		var bestCtxW float64
 819  		for j := range ctxCands {
 820  			cc := &ctxCands[j]
 821  			if cc.DstLang != dstLang {
 822  				continue
 823  			}
 824  			w := float64(cc.Weight) * gauss(cc.RArchaic, cc.RDiscourse)
 825  			if w > bestCtxW {
 826  				bestCtxW = w
 827  				ctxTranslation = cc.DstAtom
 828  			}
 829  		}
 830  	}
 831  
 832  	biCheck := func(dstAtom string) float64 {
 833  		backCands := idx.FindBySrc(dstLang, dstAtom)
 834  		var best, src float64
 835  		for j := range backCands {
 836  			bc := &backCands[j]
 837  			if bc.DstLang != srcLang {
 838  				continue
 839  			}
 840  			w := float64(bc.Weight)
 841  			if w > best {
 842  				best = w
 843  			}
 844  			if bc.DstAtom == srcAtom && w > src {
 845  				src = w
 846  			}
 847  		}
 848  		if best <= 0 {
 849  			return 1.0
 850  		}
 851  		return (src + 1) / (best + 1)
 852  	}
 853  
 854  	type atomAgg struct {
 855  		bestEntry *AtomIdxEntry
 856  		aggScore  float64
 857  		tier      int32
 858  	}
 859  	tierAtoms := map[string]*atomAgg{}
 860  	for i := range candidates {
 861  		e := &candidates[i]
 862  		if e.DstLang != dstLang {
 863  			continue
 864  		}
 865  		s := baseScore(e)
 866  		ti := -1
 867  		if e.Gen == GenContexted {
 868  			if e.ContextA == srcContext && int32(e.RoleA) == srcRole {
 869  				ti = 0
 870  			} else if int32(e.RoleA) == srcRole {
 871  				ti = 1
 872  			} else {
 873  				ti = 2
 874  			}
 875  		} else if e.Gen == GenLegacy {
 876  			ti = 3
 877  		} else if e.Gen == GenDictionary {
 878  			ti = 4
 879  		}
 880  		if ti < 0 {
 881  			continue
 882  		}
 883  		key := string([]byte{byte(ti), ':'}) | e.DstAtom
 884  		a := tierAtoms[key]
 885  		if a == nil {
 886  			a = &atomAgg{tier: ti}
 887  			tierAtoms[key] = a
 888  		}
 889  		a.aggScore += s
 890  		if a.bestEntry == nil || s > baseScore(a.bestEntry) {
 891  			a.bestEntry = e
 892  		}
 893  	}
 894  
 895  	type ranked struct {
 896  		entry    *AtomIdxEntry
 897  		aggScore float64
 898  		dstAtom  string
 899  	}
 900  	var top [5][4]ranked
 901  	for _, a := range tierAtoms {
 902  		ti := a.tier
 903  		s := a.aggScore
 904  		slot := -1
 905  		for k := 0; k < 4; k++ {
 906  			if top[ti][k].entry == nil {
 907  				slot = k
 908  				break
 909  			}
 910  			if s > top[ti][k].aggScore {
 911  				slot = k
 912  				break
 913  			}
 914  		}
 915  		if slot < 0 {
 916  			continue
 917  		}
 918  		for k := 3; k > slot; k-- {
 919  			top[ti][k] = top[ti][k-1]
 920  		}
 921  		top[ti][slot] = ranked{entry: a.bestEntry, aggScore: s, dstAtom: a.bestEntry.DstAtom}
 922  	}
 923  
 924  	ctxMatch := func(e *AtomIdxEntry) bool {
 925  		return ctxTranslation != "" && e.ContextB == ctxTranslation
 926  	}
 927  
 928  	diagDictConfirm := func(dstAtom string) bool {
 929  		for i := range candidates {
 930  			e := &candidates[i]
 931  			if e.Gen != GenDictionary || e.DstLang != dstLang {
 932  				continue
 933  			}
 934  			if !dictPOSMatch(int32(e.RoleA), srcRole) {
 935  				continue
 936  			}
 937  			if e.DstAtom == dstAtom {
 938  				return true
 939  			}
 940  			if dstLang == 1 {
 941  				da := e.DstAtom
 942  				if len(dstAtom) < len(da) && da[:len(dstAtom)] == dstAtom && da[len(dstAtom)] == ' ' {
 943  					return true
 944  				}
 945  				if len(da) < len(dstAtom) && dstAtom[:len(da)] == da && dstAtom[len(da)] == ' ' {
 946  					return true
 947  				}
 948  			}
 949  		}
 950  		return false
 951  	}
 952  
 953  	type scoredD struct {
 954  		entry    *AtomIdxEntry
 955  		combined float64
 956  		aggScore float64
 957  		biRatio  float64
 958  		ctxSim   bool
 959  		tier     int32
 960  	}
 961  	var viable []scoredD
 962  
 963  	for ti := 0; ti < 4; ti++ {
 964  		for k := 0; k < 4; k++ {
 965  			r := &top[ti][k]
 966  			if r.entry == nil {
 967  				continue
 968  			}
 969  			bi := biCheck(r.dstAtom)
 970  			combined := r.aggScore * (bi + 0.05)
 971  			cm := ctxMatch(r.entry)
 972  			if cm {
 973  				combined *= 1.5
 974  			}
 975  			if diagDictConfirm(r.dstAtom) {
 976  				combined *= 1.6
 977  			}
 978  			viable = append(viable, scoredD{
 979  				entry: r.entry, combined: combined,
 980  				aggScore: r.aggScore, biRatio: bi, ctxSim: cm, tier: ti,
 981  			})
 982  		}
 983  		if len(viable) > 0 {
 984  			break
 985  		}
 986  	}
 987  	if len(viable) == 0 {
 988  		for k := 0; k < 4; k++ {
 989  			r := &top[4][k]
 990  			if r.entry == nil {
 991  				continue
 992  			}
 993  			bi := biCheck(r.dstAtom)
 994  			combined := r.aggScore * (bi + 0.05)
 995  			viable = append(viable, scoredD{
 996  				entry: r.entry, combined: combined,
 997  				aggScore: r.aggScore, biRatio: bi, ctxSim: false, tier: 4,
 998  			})
 999  		}
1000  	}
1001  
1002  	var diag []DiagCandidate
1003  	for _, v := range viable {
1004  		diag = append(diag, DiagCandidate{
1005  			DstAtom:  v.entry.DstAtom,
1006  			AggScore: v.aggScore,
1007  			BiRatio:  v.biRatio,
1008  			Combined: v.combined,
1009  			CtxSim:   v.ctxSim,
1010  			Tier:     v.tier,
1011  		})
1012  	}
1013  
1014  	if len(viable) >= 2 {
1015  		tc0 := triConfirmStatic(idx, srcLang, dstLang, srcAtom, viable[0].entry.DstAtom)
1016  		tc1 := triConfirmStatic(idx, srcLang, dstLang, srcAtom, viable[1].entry.DstAtom)
1017  		if tc1 > 0 && tc0 == 0 {
1018  			viable[1].combined *= 1.3
1019  		} else if tc0 > 0 && tc1 == 0 {
1020  			viable[0].combined *= 1.3
1021  		}
1022  	}
1023  
1024  	var pick *AtomIdxEntry
1025  	tier := uint8(0)
1026  	bestCombined := 0.0
1027  	for _, v := range viable {
1028  		if v.combined > bestCombined {
1029  			bestCombined = v.combined
1030  			pick = v.entry
1031  			tier = uint8(v.tier + 1)
1032  		}
1033  	}
1034  
1035  	if pick == nil {
1036  		return AtomLinkResult{}, diag
1037  	}
1038  	return AtomLinkResult{
1039  		DstAtom:    pick.DstAtom,
1040  		DstRole:    int32(pick.RoleB),
1041  		DstContext: pick.ContextB,
1042  		Weight:     pick.Weight,
1043  		Generation: pick.Gen,
1044  		Tier:       tier,
1045  	}, diag
1046  }
1047  
1048  // triConfirmStatic is a non-counter-incrementing version for diagnostics.
1049  func triConfirmStatic(idx *AtomIdx, srcLang, dstLang uint8, srcAtom, dstAtom string) int32 {
1050  	intermediateLangs := [2]uint8{0x03, 0x04}
1051  	confirms := 0
1052  	for _, mid := range intermediateLangs {
1053  		if mid == srcLang || mid == dstLang {
1054  			continue
1055  		}
1056  		srcMid := topAtomVia(idx, srcLang, mid, srcAtom)
1057  		if srcMid == "" {
1058  			continue
1059  		}
1060  		dstMid := topAtomVia(idx, dstLang, mid, dstAtom)
1061  		if dstMid == "" {
1062  			continue
1063  		}
1064  		if srcMid == dstMid {
1065  			confirms++
1066  		}
1067  	}
1068  	return confirms
1069  }
1070  
1071  // topAtomVia returns the highest-weight DstAtom for srcLang->dstLang
1072  // without full scoring. Used by triangulation to get a quick "what does
1073  // this atom translate to via language M?" answer.
1074  func topAtomVia(idx *AtomIdx, srcLang, dstLang uint8, srcAtom string) string {
1075  	cands := idx.FindBySrc(srcLang, srcAtom)
1076  	var bestAtom string
1077  	var bestScore float64
1078  	for i := range cands {
1079  		e := &cands[i]
1080  		if e.DstLang != dstLang {
1081  			continue
1082  		}
1083  		da := float64(e.RArchaic)
1084  		dd := float64(e.RDiscourse)
1085  		g := math.Exp(-(da*da)/(DefaultSigmaArchaic*DefaultSigmaArchaic) - (dd*dd)/(DefaultSigmaDiscourse*DefaultSigmaDiscourse))
1086  		s := float64(e.Weight) * g
1087  		if s > bestScore {
1088  			bestScore = s
1089  			bestAtom = e.DstAtom
1090  		}
1091  	}
1092  	return bestAtom
1093  }
1094  
1095  // IngestAtomLink records a word-level cross-domain correspondence.
1096  // Words are lemmatized before storage so inflected forms collapse to region centers.
1097  // srcRole/dstRole hint whether the word is a verb (needed for JA lemmatization).
1098  //
1099  // This is the legacy bilateral function (GenLegacy generation). It remains
1100  // in place for backward compatibility and as the lookup fallback for atoms
1101  // that have no GenContexted records yet. New ingest paths should call
1102  // IngestContextedAtomLink instead.
1103  func IngestAtomLink(t *Tree, srcDomain, dstDomain uint8, srcWord, dstWord string, srcRole, dstRole int32, rArch, rDisc uint8) {
1104  	srcAtom := srcWord
1105  	dstAtom := dstWord
1106  	if srcDomain == 1 {
1107  		srcAtom = LemmatizeEN(srcWord).Lemma
1108  	} else if srcDomain == 2 {
1109  		srcAtom = LemmatizeJA(srcWord, srcRole == HistVerb).Lemma
1110  	}
1111  	if dstDomain == 1 {
1112  		dstAtom = LemmatizeEN(dstWord).Lemma
1113  	} else if dstDomain == 2 {
1114  		dstAtom = LemmatizeJA(dstWord, dstRole == HistVerb).Lemma
1115  	}
1116  	if isSingleKana(srcAtom) || isSingleKana(dstAtom) {
1117  		return
1118  	}
1119  	if isJunkJAAtom(srcAtom) || isJunkJAAtom(dstAtom) {
1120  		return
1121  	}
1122  
1123  	buf := []byte{:5 + len(srcAtom) + len(dstAtom):5 + len(srcAtom) + len(dstAtom)}
1124  	buf[0] = srcDomain
1125  	buf[1] = dstDomain
1126  	buf[2] = 'L'
1127  	buf[3] = rArch
1128  	buf[4] = rDisc
1129  	copy(buf[5:], []byte(srcAtom))
1130  	copy(buf[5+len(srcAtom):], []byte(dstAtom))
1131  	key := lattice.HashKey(buf)
1132  
1133  	ri := t.LookupRecIdx(lattice.Bpragmatic, key)
1134  	if ri != lattice.NullRec {
1135  		t.metaInc(ri)
1136  		return
1137  	}
1138  	form := srcAtom | "=" | dstAtom
1139  	var rec lattice.Record
1140  	t.setFormOnRec(&rec, form)
1141  	rec.Branch = uint8(lattice.Bpragmatic)
1142  	ri = t.db.InsertRec(lattice.Bpragmatic, key, rec)
1143  	t.metaSet(ri, MetaEntry{Count: 1, StageTag: srcDomain})
1144  	// GenLegacy records also carry the corpus register coord so the sidecar
1145  	// reader can apply distance weighting to legacy candidates too.
1146  	m := t.metaGet(ri)
1147  	if m != nil {
1148  		m.Extra[2] = rArch
1149  		m.Extra[3] = rDisc
1150  		if t.BulkMetaStore != nil {
1151  			t.BulkMetaStore.dirty[ri] = true
1152  		}
1153  	}
1154  }
1155  
1156  func IngestDictAtomLink(t *Tree, srcDomain, dstDomain uint8, srcWord, dstWord string, srcRole, dstRole int32) {
1157  	srcAtom := srcWord
1158  	dstAtom := dstWord
1159  	if srcDomain == 1 {
1160  		srcAtom = LemmatizeEN(srcWord).Lemma
1161  	} else if srcDomain == 2 {
1162  		srcAtom = LemmatizeJA(srcWord, srcRole == HistVerb).Lemma
1163  	}
1164  	if dstDomain == 1 {
1165  		dstAtom = LemmatizeEN(dstWord).Lemma
1166  	} else if dstDomain == 2 {
1167  		dstAtom = LemmatizeJA(dstWord, dstRole == HistVerb).Lemma
1168  	}
1169  	ingestDictAtomLinkInner(t, srcDomain, dstDomain, srcAtom, dstAtom, srcRole, dstRole)
1170  }
1171  
1172  func IngestDictAtomLinkRaw(t *Tree, srcDomain, dstDomain uint8, srcWord, dstWord string, srcRole, dstRole int32) {
1173  	ingestDictAtomLinkInner(t, srcDomain, dstDomain, srcWord, dstWord, srcRole, dstRole)
1174  }
1175  
1176  func ingestDictAtomLinkInner(t *Tree, srcDomain, dstDomain uint8, srcAtom, dstAtom string, srcRole, dstRole int32) {
1177  	if srcAtom == "" || dstAtom == "" {
1178  		return
1179  	}
1180  	if isSingleKana(srcAtom) || isSingleKana(dstAtom) {
1181  		return
1182  	}
1183  	if isJunkJAAtom(srcAtom) || isJunkJAAtom(dstAtom) {
1184  		return
1185  	}
1186  
1187  	buf := []byte{:5 + len(srcAtom) + len(dstAtom):5 + len(srcAtom) + len(dstAtom)}
1188  	buf[0] = srcDomain
1189  	buf[1] = dstDomain
1190  	buf[2] = 'D'
1191  	buf[3] = uint8(srcRole)
1192  	buf[4] = uint8(dstRole)
1193  	copy(buf[5:], []byte(srcAtom))
1194  	copy(buf[5+len(srcAtom):], []byte(dstAtom))
1195  	key := lattice.HashKey(buf)
1196  
1197  	ri := t.LookupRecIdx(lattice.Bpragmatic, key)
1198  	if ri != lattice.NullRec {
1199  		t.metaInc(ri)
1200  		return
1201  	}
1202  	form := srcAtom | "=" | dstAtom
1203  	var rec lattice.Record
1204  	t.setFormOnRec(&rec, form)
1205  	rec.Branch = uint8(lattice.Bpragmatic)
1206  	ri = t.db.InsertRec(lattice.Bpragmatic, key, rec)
1207  	stageTag := srcDomain | (GenDictionary << 4)
1208  	t.metaSet(ri, MetaEntry{Count: 1, StageTag: stageTag})
1209  	m := t.metaGet(ri)
1210  	if m != nil {
1211  		m.Extra[0] = uint8(dstRole)
1212  		m.Extra[1] = dstDomain
1213  		m.Extra[4] = uint8(srcRole)
1214  		if t.BulkMetaStore != nil {
1215  			t.BulkMetaStore.dirty[ri] = true
1216  		}
1217  	}
1218  }
1219