package lattice import ( "fmt" "os" ) type cacheEntry struct { N Node Ref bool } type recEntry struct { R Record Ref bool } type PageCache struct { F *os.File NodeOff int64 RecOff int64 NodeCap uint32 RecCap uint32 Loaded map[uint32]*cacheEntry Dirty map[uint32]bool RecLoaded map[uint32]*recEntry RecDirty map[uint32]bool MaxNodes int Clock []uint32 ClockHand int } const defaultMaxNodes = 4096 func newPageCache(f *os.File, nodeCap, recCap uint32) *PageCache { return &PageCache{ F: f, NodeOff: int64(headerSize), RecOff: int64(headerSize) + int64(nodeCap)*nodeSize, NodeCap: nodeCap, RecCap: recCap, Loaded: map[uint32]*cacheEntry{}, Dirty: map[uint32]bool{}, RecLoaded: map[uint32]*recEntry{}, RecDirty: map[uint32]bool{}, MaxNodes: defaultMaxNodes, Clock: []uint32{:0:64}, } } func CreateStore(path string) (*PageCache, error) { f, err := os.Create(path) if err != nil { return nil, err } nodeCap := uint32(64) recCap := uint32(128) pc := newPageCache(f, nodeCap, recCap) t := &Tree{ nCount: C, FreeHead: NullIndex, FreeCount: 0, } for i := 0; i < C; i++ { t.Roots[i] = uint32(i) } buf := writeHeaderBuf(t, nodeCap, recCap) if _, err := f.WriteAt(buf[:], 0); err != nil { f.Close() return nil, fmt.Errorf("write header: %w", err) } for i := 0; i < C; i++ { var n Node initNodeChildren(&n) n.SetBranch(Branch(i)) n.Mult = 1 nbuf := writeNodeBuf(&n) off := pc.NodeOff + int64(i)*nodeSize if _, err := f.WriteAt(nbuf[:], off); err != nil { f.Close() return nil, fmt.Errorf("write root %d: %w", i, err) } } totalSize := pc.RecOff + int64(recCap)*48 if err := f.Truncate(totalSize); err != nil { f.Close() return nil, fmt.Errorf("truncate: %w", err) } if err := f.Sync(); err != nil { f.Close() return nil, fmt.Errorf("sync: %w", err) } return pc, nil } func OpenStore(path string) (*PageCache, *iskrHeader, error) { f, err := os.OpenFile(path, os.O_RDWR, 0) if err != nil { return nil, nil, err } var buf [headerSize]byte if _, err := f.ReadAt(buf[:], 0); err != nil { f.Close() return nil, nil, fmt.Errorf("read header: %w", err) } hdr, err := readHeaderBuf(buf) if err != nil { f.Close() return nil, nil, err } pc := newPageCache(f, hdr.nodeCap, hdr.recCap) return pc, &hdr, nil } func (pc *PageCache) GetNode(idx uint32) *Node { if e, ok := pc.Loaded[idx]; ok { e.Ref = true return &e.N } var buf [nodeSize]byte off := pc.NodeOff + int64(idx)*nodeSize pc.F.ReadAt(buf[:], off) n := readNodeBuf(buf) e := &cacheEntry{N: n, Ref: true} pc.Loaded[idx] = e pc.Clock = append(pc.Clock, idx) if len(pc.Loaded) > pc.MaxNodes { pc.evict() } return &e.N } func (pc *PageCache) MarkNodeDirty(idx uint32) { pc.Dirty[idx] = true } func (pc *PageCache) GetRecord(idx uint32) *Record { if e, ok := pc.RecLoaded[idx]; ok { e.Ref = true return &e.R } var buf [48]byte off := pc.RecOff + int64(idx)*48 pc.F.ReadAt(buf[:], off) r := readRecBuf(buf) e := &recEntry{R: r, Ref: true} pc.RecLoaded[idx] = e return &e.R } func (pc *PageCache) MarkRecDirty(idx uint32) { pc.RecDirty[idx] = true } func (pc *PageCache) AllocNode(nCount *uint32) uint32 { if *nCount >= pc.NodeCap { pc.growNodes() } idx := *nCount *nCount++ var n Node initNodeChildren(&n) e := &cacheEntry{N: n, Ref: true} pc.Loaded[idx] = e pc.Dirty[idx] = true pc.Clock = append(pc.Clock, idx) return idx } func (pc *PageCache) AllocRecord(rCount *uint32) uint32 { if *rCount >= pc.RecCap { pc.growRecords() } idx := *rCount *rCount++ e := &recEntry{R: Record{}, Ref: true} pc.RecLoaded[idx] = e pc.RecDirty[idx] = true return idx } func (pc *PageCache) Flush(t *Tree) error { for idx := range pc.Dirty { e, ok := pc.Loaded[idx] if !ok { continue } buf := writeNodeBuf(&e.N) off := pc.NodeOff + int64(idx)*nodeSize if _, err := pc.F.WriteAt(buf[:], off); err != nil { return fmt.Errorf("write node %d: %w", idx, err) } } for idx := range pc.RecDirty { e, ok := pc.RecLoaded[idx] if !ok { continue } buf := writeRecBuf(&e.R) off := pc.RecOff + int64(idx)*48 if _, err := pc.F.WriteAt(buf[:], off); err != nil { return fmt.Errorf("write record %d: %w", idx, err) } } // write pending journal after used records pendOff := pc.RecOff + int64(t.rCount)*48 for i := range t.Pending { var buf [4]byte buf[0] = uint8(t.Pending[i].Branch) le.PutUint16(buf[1:3], t.Pending[i].Depth) buf[3] = t.Pending[i].Factor if _, err := pc.F.WriteAt(buf[:], pendOff+int64(i)*4); err != nil { return fmt.Errorf("write pending %d: %w", i, err) } } // commit point: write header hbuf := writeHeaderBuf(t, pc.NodeCap, pc.RecCap) if _, err := pc.F.WriteAt(hbuf[:], 0); err != nil { return fmt.Errorf("write header: %w", err) } if err := pc.F.Sync(); err != nil { return fmt.Errorf("sync: %w", err) } // clear dirty maps for k := range pc.Dirty { delete(pc.Dirty, k) } for k := range pc.RecDirty { delete(pc.RecDirty, k) } return nil } func (pc *PageCache) evict() { passes := len(pc.Clock) * 2 evicted := 0 for i := 0; i < passes && evicted == 0; i++ { if len(pc.Clock) == 0 { return } pc.ClockHand = pc.ClockHand % len(pc.Clock) idx := pc.Clock[pc.ClockHand] e, ok := pc.Loaded[idx] if !ok { pc.Clock = append(pc.Clock[:pc.ClockHand], pc.Clock[pc.ClockHand+1:]...) continue } if pc.Dirty[idx] { pc.ClockHand++ continue } if e.Ref { e.Ref = false pc.ClockHand++ continue } delete(pc.Loaded, idx) pc.Clock = append(pc.Clock[:pc.ClockHand], pc.Clock[pc.ClockHand+1:]...) evicted++ } } func (pc *PageCache) Close(t *Tree) error { if err := pc.Flush(t); err != nil { return err } return pc.F.Close() } // growNodes doubles the node region capacity. // Records must be moved backward (high addresses first) to avoid corruption // on crash. A mid-copy crash with backward movement leaves either the old // layout (header not yet updated) or the new layout (header updated after // move), both consistent. func (pc *PageCache) growNodes() { // flush dirty records before moving them for idx := range pc.RecDirty { e, ok := pc.RecLoaded[idx] if !ok { continue } buf := writeRecBuf(&e.R) off := pc.RecOff + int64(idx)*48 pc.F.WriteAt(buf[:], off) } for k := range pc.RecDirty { delete(pc.RecDirty, k) } newCap := pc.NodeCap * 2 if newCap < pc.NodeCap+64 { newCap = pc.NodeCap + 64 } oldRecOff := pc.RecOff newRecOff := int64(headerSize) + int64(newCap)*80 recBytes := int64(pc.RecCap) * 48 // extend file first totalSize := newRecOff + recBytes pc.F.Truncate(totalSize) // move records backward (high to low source offset -> high to low dest offset) var chunk [4096]byte remaining := recBytes for remaining > 0 { n := remaining if n > 4096 { n = 4096 } remaining -= n srcOff := oldRecOff + remaining dstOff := newRecOff + remaining pc.F.ReadAt(chunk[:n], srcOff) pc.F.WriteAt(chunk[:n], dstOff) } pc.NodeCap = newCap pc.RecOff = newRecOff // invalidate record cache (file offsets changed) for k := range pc.RecLoaded { delete(pc.RecLoaded, k) } } func (pc *PageCache) growRecords() { newCap := pc.RecCap * 2 if newCap < pc.RecCap+128 { newCap = pc.RecCap + 128 } totalSize := pc.RecOff + int64(newCap)*48 pc.F.Truncate(totalSize) pc.RecCap = newCap }