decodeheader.go raw

   1  // Copyright 2020+ Klaus Post. All rights reserved.
   2  // License information can be found in the LICENSE file.
   3  
   4  package zstd
   5  
   6  import (
   7  	"encoding/binary"
   8  	"errors"
   9  	"io"
  10  )
  11  
  12  // HeaderMaxSize is the maximum size of a Frame and Block Header.
  13  // If less is sent to Header.Decode it *may* still contain enough information.
  14  const HeaderMaxSize = 14 + 3
  15  
  16  // Header contains information about the first frame and block within that.
  17  type Header struct {
  18  	// SingleSegment specifies whether the data is to be decompressed into a
  19  	// single contiguous memory segment.
  20  	// It implies that WindowSize is invalid and that FrameContentSize is valid.
  21  	SingleSegment bool
  22  
  23  	// WindowSize is the window of data to keep while decoding.
  24  	// Will only be set if SingleSegment is false.
  25  	WindowSize uint64
  26  
  27  	// Dictionary ID.
  28  	// If 0, no dictionary.
  29  	DictionaryID uint32
  30  
  31  	// HasFCS specifies whether FrameContentSize has a valid value.
  32  	HasFCS bool
  33  
  34  	// FrameContentSize is the expected uncompressed size of the entire frame.
  35  	FrameContentSize uint64
  36  
  37  	// Skippable will be true if the frame is meant to be skipped.
  38  	// This implies that FirstBlock.OK is false.
  39  	Skippable bool
  40  
  41  	// SkippableID is the user-specific ID for the skippable frame.
  42  	// Valid values are between 0 to 15, inclusive.
  43  	SkippableID int
  44  
  45  	// SkippableSize is the length of the user data to skip following
  46  	// the header.
  47  	SkippableSize uint32
  48  
  49  	// HeaderSize is the raw size of the frame header.
  50  	//
  51  	// For normal frames, it includes the size of the magic number and
  52  	// the size of the header (per section 3.1.1.1).
  53  	// It does not include the size for any data blocks (section 3.1.1.2) nor
  54  	// the size for the trailing content checksum.
  55  	//
  56  	// For skippable frames, this counts the size of the magic number
  57  	// along with the size of the size field of the payload.
  58  	// It does not include the size of the skippable payload itself.
  59  	// The total frame size is the HeaderSize plus the SkippableSize.
  60  	HeaderSize int
  61  
  62  	// First block information.
  63  	FirstBlock struct {
  64  		// OK will be set if first block could be decoded.
  65  		OK bool
  66  
  67  		// Is this the last block of a frame?
  68  		Last bool
  69  
  70  		// Is the data compressed?
  71  		// If true CompressedSize will be populated.
  72  		// Unfortunately DecompressedSize cannot be determined
  73  		// without decoding the blocks.
  74  		Compressed bool
  75  
  76  		// DecompressedSize is the expected decompressed size of the block.
  77  		// Will be 0 if it cannot be determined.
  78  		DecompressedSize int
  79  
  80  		// CompressedSize of the data in the block.
  81  		// Does not include the block header.
  82  		// Will be equal to DecompressedSize if not Compressed.
  83  		CompressedSize int
  84  	}
  85  
  86  	// If set there is a checksum present for the block content.
  87  	// The checksum field at the end is always 4 bytes long.
  88  	HasCheckSum bool
  89  }
  90  
  91  // Decode the header from the beginning of the stream.
  92  // This will decode the frame header and the first block header if enough bytes are provided.
  93  // It is recommended to provide at least HeaderMaxSize bytes.
  94  // If the frame header cannot be read an error will be returned.
  95  // If there isn't enough input, io.ErrUnexpectedEOF is returned.
  96  // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
  97  func (h *Header) Decode(in []byte) error {
  98  	_, err := h.DecodeAndStrip(in)
  99  	return err
 100  }
 101  
 102  // DecodeAndStrip will decode the header from the beginning of the stream
 103  // and on success return the remaining bytes.
 104  // This will decode the frame header and the first block header if enough bytes are provided.
 105  // It is recommended to provide at least HeaderMaxSize bytes.
 106  // If the frame header cannot be read an error will be returned.
 107  // If there isn't enough input, io.ErrUnexpectedEOF is returned.
 108  // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
 109  func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
 110  	*h = Header{}
 111  	if len(in) < 4 {
 112  		return nil, io.ErrUnexpectedEOF
 113  	}
 114  	h.HeaderSize += 4
 115  	b, in := in[:4], in[4:]
 116  	if string(b) != frameMagic {
 117  		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
 118  			return nil, ErrMagicMismatch
 119  		}
 120  		if len(in) < 4 {
 121  			return nil, io.ErrUnexpectedEOF
 122  		}
 123  		h.HeaderSize += 4
 124  		h.Skippable = true
 125  		h.SkippableID = int(b[0] & 0xf)
 126  		h.SkippableSize = binary.LittleEndian.Uint32(in)
 127  		return in[4:], nil
 128  	}
 129  
 130  	// Read Window_Descriptor
 131  	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
 132  	if len(in) < 1 {
 133  		return nil, io.ErrUnexpectedEOF
 134  	}
 135  	fhd, in := in[0], in[1:]
 136  	h.HeaderSize++
 137  	h.SingleSegment = fhd&(1<<5) != 0
 138  	h.HasCheckSum = fhd&(1<<2) != 0
 139  	if fhd&(1<<3) != 0 {
 140  		return nil, errors.New("reserved bit set on frame header")
 141  	}
 142  
 143  	if !h.SingleSegment {
 144  		if len(in) < 1 {
 145  			return nil, io.ErrUnexpectedEOF
 146  		}
 147  		var wd byte
 148  		wd, in = in[0], in[1:]
 149  		h.HeaderSize++
 150  		windowLog := 10 + (wd >> 3)
 151  		windowBase := uint64(1) << windowLog
 152  		windowAdd := (windowBase / 8) * uint64(wd&0x7)
 153  		h.WindowSize = windowBase + windowAdd
 154  	}
 155  
 156  	// Read Dictionary_ID
 157  	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
 158  	if size := fhd & 3; size != 0 {
 159  		if size == 3 {
 160  			size = 4
 161  		}
 162  		if len(in) < int(size) {
 163  			return nil, io.ErrUnexpectedEOF
 164  		}
 165  		b, in = in[:size], in[size:]
 166  		h.HeaderSize += int(size)
 167  		switch len(b) {
 168  		case 1:
 169  			h.DictionaryID = uint32(b[0])
 170  		case 2:
 171  			h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
 172  		case 4:
 173  			h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
 174  		}
 175  	}
 176  
 177  	// Read Frame_Content_Size
 178  	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
 179  	var fcsSize int
 180  	v := fhd >> 6
 181  	switch v {
 182  	case 0:
 183  		if h.SingleSegment {
 184  			fcsSize = 1
 185  		}
 186  	default:
 187  		fcsSize = 1 << v
 188  	}
 189  
 190  	if fcsSize > 0 {
 191  		h.HasFCS = true
 192  		if len(in) < fcsSize {
 193  			return nil, io.ErrUnexpectedEOF
 194  		}
 195  		b, in = in[:fcsSize], in[fcsSize:]
 196  		h.HeaderSize += int(fcsSize)
 197  		switch len(b) {
 198  		case 1:
 199  			h.FrameContentSize = uint64(b[0])
 200  		case 2:
 201  			// When FCS_Field_Size is 2, the offset of 256 is added.
 202  			h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
 203  		case 4:
 204  			h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
 205  		case 8:
 206  			d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
 207  			d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
 208  			h.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
 209  		}
 210  	}
 211  
 212  	// Frame Header done, we will not fail from now on.
 213  	if len(in) < 3 {
 214  		return in, nil
 215  	}
 216  	tmp := in[:3]
 217  	bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
 218  	h.FirstBlock.Last = bh&1 != 0
 219  	blockType := blockType((bh >> 1) & 3)
 220  	// find size.
 221  	cSize := int(bh >> 3)
 222  	switch blockType {
 223  	case blockTypeReserved:
 224  		return in, nil
 225  	case blockTypeRLE:
 226  		h.FirstBlock.Compressed = true
 227  		h.FirstBlock.DecompressedSize = cSize
 228  		h.FirstBlock.CompressedSize = 1
 229  	case blockTypeCompressed:
 230  		h.FirstBlock.Compressed = true
 231  		h.FirstBlock.CompressedSize = cSize
 232  	case blockTypeRaw:
 233  		h.FirstBlock.DecompressedSize = cSize
 234  		h.FirstBlock.CompressedSize = cSize
 235  	default:
 236  		panic("Invalid block type")
 237  	}
 238  
 239  	h.FirstBlock.OK = true
 240  	return in, nil
 241  }
 242  
 243  // AppendTo will append the encoded header to the dst slice.
 244  // There is no error checking performed on the header values.
 245  func (h *Header) AppendTo(dst []byte) ([]byte, error) {
 246  	if h.Skippable {
 247  		magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
 248  		magic[0] |= byte(h.SkippableID & 0xf)
 249  		dst = append(dst, magic[:]...)
 250  		f := h.SkippableSize
 251  		return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
 252  	}
 253  	f := frameHeader{
 254  		ContentSize:   h.FrameContentSize,
 255  		WindowSize:    uint32(h.WindowSize),
 256  		SingleSegment: h.SingleSegment,
 257  		Checksum:      h.HasCheckSum,
 258  		DictID:        h.DictionaryID,
 259  	}
 260  	return f.appendTo(dst), nil
 261  }
 262