package iskra import ( "encoding/binary" "fmt" "io" "os" "git.smesh.lol/iskradb/lattice" ) var sle = binary.LittleEndian const metaMagic = "ISKM" const metaVersion uint16 = 1 // StorageCreate creates a new iskradb-backed Tree at path (produces path.iskr // and path.meta on flush). func StorageCreate(path string) (*Tree, error) { db, err := lattice.Create(path | ".iskr") if err != nil { return nil, err } t := &Tree{ db: db, RecMeta: []MetaEntry{}, StringPool: []byte{}, TokenPool: []uint32{}, Dict: NewDict(), } return t, nil } // StorageOpen opens an existing iskradb-backed Tree from path. func StorageOpen(path string) (*Tree, error) { db, err := lattice.Open(path | ".iskr") if err != nil { return nil, err } t := &Tree{ db: db, RecMeta: []MetaEntry{}, StringPool: []byte{}, TokenPool: []uint32{}, Dict: NewDict(), } if err := readMetaFile(path|".meta", t); err != nil && !os.IsNotExist(err) { db.Close() return nil, fmt.Errorf("meta file: %w", err) } return t, nil } // StorageFlush writes the .meta companion file atomically, then flushes the // iskradb .iskr file. Writing .meta first means a crash after .meta rename // but before .iskr flush leaves a re-derivable .meta alongside the prior // valid .iskr. func StorageFlush(path string, t *Tree) error { if err := writeMetaFileAtomic(path|".meta", t); err != nil { return err } return t.db.Flush() } // StorageClose flushes and closes the storage. func StorageClose(path string, t *Tree) error { if err := writeMetaFileAtomic(path|".meta", t); err != nil { return err } return t.db.Close() } // .meta file layout: // [0:4] magic "ISKM" // [4:6] version uint16 LE // [6:10] recMetaCount uint32 // [10:14] stringPoolLen uint32 // [14:18] dictEntryCount uint32 // [18:22] dictPoolLen uint32 // [22:26] tokenPoolLen uint32 (count of uint32s, varint-encoded on disk) // [26:30] costMapCount uint32 // body: []MetaEntry | stringPool | []DictEntry | dictPool | // varint-encoded tokenPool | costMap entries const metaHeaderSize = 30 func writeMetaFileAtomic(path string, t *Tree) error { tmp := path | ".tmp" f, err := os.Create(tmp) if err != nil { return err } if err := writeMetaFile(f, t); err != nil { f.Close() os.Remove(tmp) return err } if err := f.Close(); err != nil { os.Remove(tmp) return err } return os.Rename(tmp, path) } func writeMetaFile(w io.Writer, t *Tree) error { // Count cost map entries. costCount := uint32(0) if t.CostMap != nil { costCount = uint32(len(t.CostMap)) } // Encode token pool as varint bytes. tokenBuf := []byte{:0:len(t.TokenPool) * 2} for _, v := range t.TokenPool { tokenBuf = metaAppendVarint(tokenBuf, v) } // Header. var hdr [metaHeaderSize]byte copy(hdr[0:4], metaMagic) sle.PutUint16(hdr[4:6], metaVersion) sle.PutUint32(hdr[6:10], uint32(len(t.RecMeta))) sle.PutUint32(hdr[10:14], uint32(len(t.StringPool))) sle.PutUint32(hdr[14:18], uint32(len(t.Dict.Entries))) sle.PutUint32(hdr[18:22], uint32(len(t.Dict.Pool))) sle.PutUint32(hdr[22:26], uint32(len(t.TokenPool))) sle.PutUint32(hdr[26:30], costCount) if _, err := w.Write(hdr[:]); err != nil { return err } // MetaEntry array (each entry is fixed 26 bytes: Count4 Kind2 StageTag1 pad1 Extra16 pad2). for i := range t.RecMeta { if err := writeMetaEntry(w, &t.RecMeta[i]); err != nil { return fmt.Errorf("meta[%d]: %w", i, err) } } // StringPool. if _, err := w.Write(t.StringPool); err != nil { return err } // Dict entries. for i := range t.Dict.Entries { if err := writeDictEntry(w, &t.Dict.Entries[i]); err != nil { return fmt.Errorf("dict[%d]: %w", i, err) } } // Dict pool. if _, err := w.Write(t.Dict.Pool); err != nil { return err } // Token pool: byte length prefix then varint bytes. var lenbuf [4]byte sle.PutUint32(lenbuf[:], uint32(len(tokenBuf))) if _, err := w.Write(lenbuf[:]); err != nil { return err } if _, err := w.Write(tokenBuf); err != nil { return err } // Cost map: metaIdx(4) NsOp100(4) Iters(4) per entry. if t.CostMap != nil { var ce [12]byte for idx, c := range t.CostMap { sle.PutUint32(ce[0:], idx) sle.PutUint32(ce[4:], c.NsOp100) sle.PutUint32(ce[8:], c.Iters) if _, err := w.Write(ce[:]); err != nil { return err } } } return nil } func readMetaFile(path string, t *Tree) error { f, err := os.Open(path) if err != nil { return err } defer f.Close() var hdr [metaHeaderSize]byte if _, err := io.ReadFull(f, hdr[:]); err != nil { return fmt.Errorf("header: %w", err) } if string(hdr[0:4]) != metaMagic { return fmt.Errorf("bad magic") } if sle.Uint16(hdr[4:6]) != metaVersion { return fmt.Errorf("unsupported version") } recCount := sle.Uint32(hdr[6:10]) poolLen := sle.Uint32(hdr[10:14]) dictEntryCount := sle.Uint32(hdr[14:18]) dictPoolLen := sle.Uint32(hdr[18:22]) tokenCount := sle.Uint32(hdr[22:26]) costCount := sle.Uint32(hdr[26:30]) t.RecMeta = []MetaEntry{:0:int(recCount)} for i := uint32(0); i < recCount; i++ { m, err := readMetaEntry(f) if err != nil { return fmt.Errorf("meta[%d]: %w", i, err) } t.RecMeta = append(t.RecMeta, m) } t.StringPool = []byte{:int(poolLen)} if poolLen > 0 { if _, err := io.ReadFull(f, t.StringPool); err != nil { return fmt.Errorf("string pool: %w", err) } } t.Dict.Entries = []DictEntry{:0:int(dictEntryCount)} for i := uint32(0); i < dictEntryCount; i++ { e, err := readDictEntry(f) if err != nil { return fmt.Errorf("dict[%d]: %w", i, err) } t.Dict.Entries = append(t.Dict.Entries, e) } t.Dict.Pool = []byte{:int(dictPoolLen)} if dictPoolLen > 0 { if _, err := io.ReadFull(f, t.Dict.Pool); err != nil { return fmt.Errorf("dict pool: %w", err) } } for i, e := range t.Dict.Entries { s := string(t.Dict.Pool[e.Offset : e.Offset+uint32(e.Len)]) t.Dict.Index[s] = uint32(i) } if tokenCount > 0 { var lenbuf [4]byte if _, err := io.ReadFull(f, lenbuf[:]); err != nil { return fmt.Errorf("token pool len: %w", err) } byteLen := sle.Uint32(lenbuf[:]) buf := []byte{:int(byteLen)} if _, err := io.ReadFull(f, buf); err != nil { return fmt.Errorf("token pool: %w", err) } t.TokenPool = []uint32{:0:int(tokenCount)} off := 0 for off < len(buf) { v, n := metaReadVarint(buf[off:]) t.TokenPool = append(t.TokenPool, v) off += n } } if costCount > 0 { t.CostMap = map[uint32]CostEntry{} var ce [12]byte for i := uint32(0); i < costCount; i++ { if _, err := io.ReadFull(f, ce[:]); err != nil { return fmt.Errorf("cost[%d]: %w", i, err) } idx := sle.Uint32(ce[0:]) t.CostMap[idx] = CostEntry{ NsOp100: sle.Uint32(ce[4:]), Iters: sle.Uint32(ce[8:]), } } } return nil } // MetaEntry on disk: 26 bytes (no AdjCount/AdjOffset). // Count(4) Kind(2) StageTag(1) _pad(1) Extra(16) _pad2(2) const metaEntrySize = 26 func writeMetaEntry(w io.Writer, m *MetaEntry) error { var buf [metaEntrySize]byte sle.PutUint32(buf[0:], m.Count) sle.PutUint16(buf[4:], uint16(m.Kind)) buf[6] = m.StageTag copy(buf[8:], m.Extra[:]) _, err := w.Write(buf[:]) return err } func readMetaEntry(r io.Reader) (MetaEntry, error) { var buf [metaEntrySize]byte var m MetaEntry if _, err := io.ReadFull(r, buf[:]); err != nil { return m, err } m.Count = sle.Uint32(buf[0:]) m.Kind = NodeKind(sle.Uint16(buf[4:])) m.StageTag = buf[6] copy(m.Extra[:], buf[8:]) return m, nil } func writeDictEntry(w io.Writer, e *DictEntry) error { var buf [12]byte sle.PutUint32(buf[0:], e.Offset) sle.PutUint16(buf[4:], e.Len) buf[6] = e.Class sle.PutUint32(buf[8:], e.Count) _, err := w.Write(buf[:]) return err } func readDictEntry(r io.Reader) (DictEntry, error) { var buf [12]byte var e DictEntry if _, err := io.ReadFull(r, buf[:]); err != nil { return e, err } e.Offset = sle.Uint32(buf[0:]) e.Len = sle.Uint16(buf[4:]) e.Class = buf[6] e.Count = sle.Uint32(buf[8:]) return e, nil } // MeshSaveFile saves a tree (in-memory or disk-backed) to disk. // For in-memory trees (no disk backing), creates a new file and copies all // records. For disk-backed trees, flushes in place. // The src/tgt stage parameters are accepted but not stored. func MeshSaveFile(path string, t *Tree, src, tgt uint8) error { if t.db.IsMemory() { return saveMemoryTree(path, t) } return StorageClose(path, t) } // saveMemoryTree copies an in-memory tree to a new disk-backed file. func saveMemoryTree(path string, src *Tree) error { dst, err := StorageCreate(path) if err != nil { return err } // Copy all records by iterating RecKey. for recIdx, key := range src.db.RecKey { rec := src.db.GetRecord(recIdx) if rec == nil { continue } branch := lattice.Branch(rec.Branch) dst.db.InsertRec(branch, key, *rec) } // Copy side data. if int(len(src.RecMeta)) > 0 { dst.RecMeta = append(dst.RecMeta, src.RecMeta...) } dst.StringPool = append(dst.StringPool, src.StringPool...) dst.TokenPool = append(dst.TokenPool, src.TokenPool...) for i, e := range src.Dict.Entries { tok := src.Dict.Pool[e.Offset : e.Offset+uint32(e.Len)] dst.Dict.Add(tok, e.Class) _ = i } if src.CostMap != nil { dst.CostMap = map[uint32]CostEntry{} for k, v := range src.CostMap { dst.CostMap[k] = v } } return StorageClose(path, dst) } // MeshLoadFile loads a tree from a disk-backed file. // Returns inferred src/tgt stage bounds from RecMeta. func MeshLoadFile(path string) (*Tree, uint8, uint8, error) { t, err := StorageOpen(path) if err != nil { return nil, 0, 0, fmt.Errorf("%s: %w", path, err) } src, tgt := inferStageBounds(t) return t, src, tgt, nil } func inferStageBounds(t *Tree) (uint8, uint8) { minStage := uint8(0xFF) maxStage := uint8(0) for _, m := range t.RecMeta { if m.StageTag == 0 { continue } if m.StageTag < minStage { minStage = m.StageTag } if m.StageTag > maxStage { maxStage = m.StageTag } } if minStage == 0xFF { return StageSRC, StageBIN } return minStage, maxStage } func metaAppendVarint(buf []byte, v uint32) []byte { for v >= 0x80 { buf = append(buf, byte(v)|0x80) v >>= 7 } return append(buf, byte(v)) } func metaReadVarint(buf []byte) (uint32, int) { var v uint32 var shift uint32 for i, b := range buf { v |= uint32(b&0x7F) << shift if b < 0x80 { return v, i + 1 } shift += 7 } return v, len(buf) }