storage.mx raw

   1  package ingest
   2  
   3  import (
   4  	"encoding/binary"
   5  	"fmt"
   6  	"io"
   7  	"os"
   8  
   9  	"git.smesh.lol/iskradb/lattice"
  10  )
  11  
  12  var le = binary.LittleEndian
  13  
  14  // SaveFlat writes the DB to a flat binary file.
  15  // Format (C=8):
  16  //   [4B]     nodeCount
  17  //   [4B]     recCount
  18  //   [C×4B]   roots (one per axis branch)
  19  //   [4B]     poolLen
  20  //   [nodeCount × nodeSize]  nodes
  21  //   [recCount × 48B]        records
  22  //   [poolLen B]             string pool
  23  //   [recCount × 20B]        RecKey entries: recIdx(4) + key(16)
  24  func SaveFlat(db *DB, path string) error {
  25  	tmp := path | ".tmp"
  26  	f, err := os.Create(tmp)
  27  	if err != nil {
  28  		return err
  29  	}
  30  
  31  	nCount := uint32(db.Tree.NodeCount())
  32  	rCount := uint32(db.Tree.RecordCount())
  33  	poolLen := uint32(len(db.StringPool))
  34  
  35  	// Header: nCount(4) + rCount(4) + C roots(C*4) + poolLen(4)
  36  	hdrSize := 4 + 4 + lattice.C*4 + 4
  37  	hdr := []byte{:hdrSize:hdrSize}
  38  	le.PutUint32(hdr[0:], nCount)
  39  	le.PutUint32(hdr[4:], rCount)
  40  	for i := 0; i < lattice.C; i++ {
  41  		le.PutUint32(hdr[8+i*4:], db.Tree.RootIdx(lattice.Branch(i)))
  42  	}
  43  	le.PutUint32(hdr[8+lattice.C*4:], poolLen)
  44  	if _, err := f.Write(hdr); err != nil {
  45  		f.Close()
  46  		os.Remove(tmp)
  47  		return err
  48  	}
  49  
  50  	// Nodes: 64 bytes each.
  51  	for i := uint32(0); i < nCount; i++ {
  52  		if err := writeNode(f, db.Tree, i); err != nil {
  53  			f.Close()
  54  			os.Remove(tmp)
  55  			return fmt.Errorf("node %d: %w", i, err)
  56  		}
  57  	}
  58  
  59  	// Records: 48 bytes each.
  60  	for i := uint32(0); i < rCount; i++ {
  61  		if err := writeRecord(f, db.Tree, i); err != nil {
  62  			f.Close()
  63  			os.Remove(tmp)
  64  			return fmt.Errorf("rec %d: %w", i, err)
  65  		}
  66  	}
  67  
  68  	// String pool.
  69  	if _, err := f.Write(db.StringPool); err != nil {
  70  		f.Close()
  71  		os.Remove(tmp)
  72  		return err
  73  	}
  74  
  75  	// RecKey map: recIdx(4) + key(16) per entry (128-bit SipHash key).
  76  	var rkBuf [20]byte
  77  	for recIdx, key := range db.Tree.RecKey {
  78  		le.PutUint32(rkBuf[0:], recIdx)
  79  		le.PutUint64(rkBuf[4:], key[0])
  80  		le.PutUint64(rkBuf[12:], key[1])
  81  		if _, err := f.Write(rkBuf[:]); err != nil {
  82  			f.Close()
  83  			os.Remove(tmp)
  84  			return err
  85  		}
  86  	}
  87  
  88  	if err := f.Close(); err != nil {
  89  		os.Remove(tmp)
  90  		return err
  91  	}
  92  	return os.Rename(tmp, path)
  93  }
  94  
  95  // LoadFlatFromReader reads a DB from an io.Reader containing data written by SaveFlat.
  96  func LoadFlatFromReader(r io.Reader) (*DB, error) {
  97  	hdrSize := 4 + 4 + lattice.C*4 + 4
  98  	hdr := []byte{:hdrSize:hdrSize}
  99  	if _, err := io.ReadFull(r, hdr); err != nil {
 100  		return nil, fmt.Errorf("header: %w", err)
 101  	}
 102  	nCount := le.Uint32(hdr[0:])
 103  	rCount := le.Uint32(hdr[4:])
 104  	var roots [lattice.C]uint32
 105  	for i := 0; i < lattice.C; i++ {
 106  		roots[i] = le.Uint32(hdr[8+i*4:])
 107  	}
 108  	poolLen := le.Uint32(hdr[8+lattice.C*4:])
 109  
 110  	tree := lattice.AllocTree(nCount, rCount, roots)
 111  
 112  	for i := uint32(0); i < nCount; i++ {
 113  		if err := readNode(r, tree, i); err != nil {
 114  			return nil, fmt.Errorf("node %d: %w", i, err)
 115  		}
 116  	}
 117  
 118  	for i := uint32(0); i < rCount; i++ {
 119  		if err := readRecord(r, tree, i); err != nil {
 120  			return nil, fmt.Errorf("rec %d: %w", i, err)
 121  		}
 122  	}
 123  
 124  	pool := []byte{:int(poolLen)}
 125  	if poolLen > 0 {
 126  		if _, err := io.ReadFull(r, pool); err != nil {
 127  			return nil, fmt.Errorf("pool: %w", err)
 128  		}
 129  	}
 130  
 131  	var rkBuf [20]byte
 132  	for {
 133  		n, err := io.ReadFull(r, rkBuf[:])
 134  		if n == 0 || err == io.EOF || err == io.ErrUnexpectedEOF {
 135  			break
 136  		}
 137  		if err != nil {
 138  			return nil, fmt.Errorf("reckey: %w", err)
 139  		}
 140  		recIdx := le.Uint32(rkBuf[0:])
 141  		key := lattice.Key{le.Uint64(rkBuf[4:]), le.Uint64(rkBuf[12:])}
 142  		tree.RecKey[recIdx] = key
 143  	}
 144  
 145  	return &DB{Tree: tree, StringPool: pool}, nil
 146  }
 147  
 148  // LoadFlat reads a DB from a flat binary file written by SaveFlat.
 149  func LoadFlat(path string) (*DB, error) {
 150  	f, err := os.Open(path)
 151  	if err != nil {
 152  		return nil, err
 153  	}
 154  	defer f.Close()
 155  	return LoadFlatFromReader(f)
 156  }
 157  
 158  // Node: Keys[C](C*16B) RecPtrs[C](C*4B) Children[C+1]((C+1)*4B) Mult(1) Flags(1) pad(2)
 159  const flatNodeSize = lattice.C*16 + lattice.C*4 + (lattice.C+1)*4 + 4
 160  
 161  func writeNode(w io.Writer, tree *lattice.Tree, idx uint32) error {
 162  	node := tree.GetNode(idx)
 163  	buf := []byte{:flatNodeSize:flatNodeSize}
 164  	for i := 0; i < lattice.C; i++ {
 165  		le.PutUint64(buf[i*16:], node.Keys[i][0])
 166  		le.PutUint64(buf[i*16+8:], node.Keys[i][1])
 167  	}
 168  	base := lattice.C * 16
 169  	for i := 0; i < lattice.C; i++ {
 170  		le.PutUint32(buf[base+i*4:], node.RecPtrs[i])
 171  	}
 172  	base += lattice.C * 4
 173  	for i := 0; i < lattice.C+1; i++ {
 174  		le.PutUint32(buf[base+i*4:], node.Children[i])
 175  	}
 176  	base += (lattice.C + 1) * 4
 177  	buf[base] = node.Mult
 178  	buf[base+1] = node.Flags
 179  	_, err := w.Write(buf)
 180  	return err
 181  }
 182  
 183  func readNode(r io.Reader, tree *lattice.Tree, idx uint32) error {
 184  	buf := []byte{:flatNodeSize:flatNodeSize}
 185  	if _, err := io.ReadFull(r, buf); err != nil {
 186  		return err
 187  	}
 188  	node := tree.GetNode(idx)
 189  	for i := 0; i < lattice.C; i++ {
 190  		node.Keys[i][0] = le.Uint64(buf[i*16:])
 191  		node.Keys[i][1] = le.Uint64(buf[i*16+8:])
 192  	}
 193  	base := lattice.C * 16
 194  	for i := 0; i < lattice.C; i++ {
 195  		node.RecPtrs[i] = le.Uint32(buf[base+i*4:])
 196  	}
 197  	base += lattice.C * 4
 198  	for i := 0; i < lattice.C+1; i++ {
 199  		node.Children[i] = le.Uint32(buf[base+i*4:])
 200  	}
 201  	base += (lattice.C + 1) * 4
 202  	node.Mult = buf[base]
 203  	node.Flags = buf[base+1]
 204  	return nil
 205  }
 206  
 207  // Record: DataFile(4) DataOff(4) DataLen(4) Link[2](8) Branch(1) pad(3) Inline[24] = 48B
 208  func writeRecord(w io.Writer, tree *lattice.Tree, idx uint32) error {
 209  	rec := tree.GetRecord(idx)
 210  	var buf [48]byte
 211  	le.PutUint32(buf[0:], rec.DataFile)
 212  	le.PutUint32(buf[4:], rec.DataOff)
 213  	le.PutUint32(buf[8:], rec.DataLen)
 214  	le.PutUint32(buf[12:], rec.Link[0])
 215  	le.PutUint32(buf[16:], rec.Link[1])
 216  	buf[20] = rec.Branch
 217  	copy(buf[24:], rec.Inline[:])
 218  	_, err := w.Write(buf[:])
 219  	return err
 220  }
 221  
 222  func readRecord(r io.Reader, tree *lattice.Tree, idx uint32) error {
 223  	var buf [48]byte
 224  	if _, err := io.ReadFull(r, buf[:]); err != nil {
 225  		return err
 226  	}
 227  	rec := tree.GetRecord(idx)
 228  	rec.DataFile = le.Uint32(buf[0:])
 229  	rec.DataOff = le.Uint32(buf[4:])
 230  	rec.DataLen = le.Uint32(buf[8:])
 231  	rec.Link[0] = le.Uint32(buf[12:])
 232  	rec.Link[1] = le.Uint32(buf[16:])
 233  	rec.Branch = buf[20]
 234  	copy(rec.Inline[:], buf[24:])
 235  	return nil
 236  }
 237