reader.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  /*
   6  Package zlib implements reading and writing of zlib format compressed data,
   7  as specified in RFC 1950.
   8  
   9  The implementation provides filters that uncompress during reading
  10  and compress during writing.  For example, to write compressed data
  11  to a buffer:
  12  
  13  	var b bytes.Buffer
  14  	w := zlib.NewWriter(&b)
  15  	w.Write([]byte("hello, world\n"))
  16  	w.Close()
  17  
  18  and to read that data back:
  19  
  20  	r, err := zlib.NewReader(&b)
  21  	io.Copy(os.Stdout, r)
  22  	r.Close()
  23  */
  24  package zlib
  25  
  26  import (
  27  	"bufio"
  28  	"compress/flate"
  29  	"encoding/binary"
  30  	"errors"
  31  	"hash"
  32  	"hash/adler32"
  33  	"io"
  34  )
  35  
  36  const (
  37  	zlibDeflate   = 8
  38  	zlibMaxWindow = 7
  39  )
  40  
  41  var (
  42  	// ErrChecksum is returned when reading ZLIB data that has an invalid checksum.
  43  	ErrChecksum = errors.New("zlib: invalid checksum")
  44  	// ErrDictionary is returned when reading ZLIB data that has an invalid dictionary.
  45  	ErrDictionary = errors.New("zlib: invalid dictionary")
  46  	// ErrHeader is returned when reading ZLIB data that has an invalid header.
  47  	ErrHeader = errors.New("zlib: invalid header")
  48  )
  49  
  50  type reader struct {
  51  	r            flate.Reader
  52  	decompressor io.ReadCloser
  53  	digest       hash.Hash32
  54  	err          error
  55  	scratch      [4]byte
  56  }
  57  
  58  // Resetter resets a ReadCloser returned by [NewReader] or [NewReaderDict]
  59  // to switch to a new underlying Reader. This permits reusing a ReadCloser
  60  // instead of allocating a new one.
  61  type Resetter interface {
  62  	// Reset discards any buffered data and resets the Resetter as if it was
  63  	// newly initialized with the given reader.
  64  	Reset(r io.Reader, dict []byte) error
  65  }
  66  
  67  // NewReader creates a new ReadCloser.
  68  // Reads from the returned ReadCloser read and decompress data from r.
  69  // If r does not implement [io.ByteReader], the decompressor may read more
  70  // data than necessary from r.
  71  // It is the caller's responsibility to call Close on the ReadCloser when done.
  72  //
  73  // The [io.ReadCloser] returned by NewReader also implements [Resetter].
  74  func NewReader(r io.Reader) (io.ReadCloser, error) {
  75  	return NewReaderDict(r, nil)
  76  }
  77  
  78  // NewReaderDict is like [NewReader] but uses a preset dictionary.
  79  // NewReaderDict ignores the dictionary if the compressed data does not refer to it.
  80  // If the compressed data refers to a different dictionary, NewReaderDict returns [ErrDictionary].
  81  //
  82  // The ReadCloser returned by NewReaderDict also implements [Resetter].
  83  func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
  84  	z := &reader{}
  85  	err := z.Reset(r, dict)
  86  	if err != nil {
  87  		return nil, err
  88  	}
  89  	return z, nil
  90  }
  91  
  92  func (z *reader) Read(p []byte) (int, error) {
  93  	if z.err != nil {
  94  		return 0, z.err
  95  	}
  96  
  97  	var n int
  98  	n, z.err = z.decompressor.Read(p)
  99  	z.digest.Write(p[0:n])
 100  	if z.err != io.EOF {
 101  		// In the normal case we return here.
 102  		return n, z.err
 103  	}
 104  
 105  	// Finished file; check checksum.
 106  	if _, err := io.ReadFull(z.r, z.scratch[0:4]); err != nil {
 107  		if err == io.EOF {
 108  			err = io.ErrUnexpectedEOF
 109  		}
 110  		z.err = err
 111  		return n, z.err
 112  	}
 113  	// ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
 114  	checksum := binary.BigEndian.Uint32(z.scratch[:4])
 115  	if checksum != z.digest.Sum32() {
 116  		z.err = ErrChecksum
 117  		return n, z.err
 118  	}
 119  	return n, io.EOF
 120  }
 121  
 122  // Calling Close does not close the wrapped [io.Reader] originally passed to [NewReader].
 123  // In order for the ZLIB checksum to be verified, the reader must be
 124  // fully consumed until the [io.EOF].
 125  func (z *reader) Close() error {
 126  	if z.err != nil && z.err != io.EOF {
 127  		return z.err
 128  	}
 129  	z.err = z.decompressor.Close()
 130  	return z.err
 131  }
 132  
 133  func (z *reader) Reset(r io.Reader, dict []byte) error {
 134  	*z = reader{decompressor: z.decompressor}
 135  	if fr, ok := r.(flate.Reader); ok {
 136  		z.r = fr
 137  	} else {
 138  		z.r = bufio.NewReader(r)
 139  	}
 140  
 141  	// Read the header (RFC 1950 section 2.2.).
 142  	_, z.err = io.ReadFull(z.r, z.scratch[0:2])
 143  	if z.err != nil {
 144  		if z.err == io.EOF {
 145  			z.err = io.ErrUnexpectedEOF
 146  		}
 147  		return z.err
 148  	}
 149  	h := binary.BigEndian.Uint16(z.scratch[:2])
 150  	if (z.scratch[0]&0x0f != zlibDeflate) || (z.scratch[0]>>4 > zlibMaxWindow) || (h%31 != 0) {
 151  		z.err = ErrHeader
 152  		return z.err
 153  	}
 154  	haveDict := z.scratch[1]&0x20 != 0
 155  	if haveDict {
 156  		_, z.err = io.ReadFull(z.r, z.scratch[0:4])
 157  		if z.err != nil {
 158  			if z.err == io.EOF {
 159  				z.err = io.ErrUnexpectedEOF
 160  			}
 161  			return z.err
 162  		}
 163  		checksum := binary.BigEndian.Uint32(z.scratch[:4])
 164  		if checksum != adler32.Checksum(dict) {
 165  			z.err = ErrDictionary
 166  			return z.err
 167  		}
 168  	}
 169  
 170  	if z.decompressor == nil {
 171  		if haveDict {
 172  			z.decompressor = flate.NewReaderDict(z.r, dict)
 173  		} else {
 174  			z.decompressor = flate.NewReader(z.r)
 175  		}
 176  	} else {
 177  		z.decompressor.(flate.Resetter).Reset(z.r, dict)
 178  	}
 179  	z.digest = adler32.New()
 180  	return nil
 181  }
 182