gzip.mx raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package gzip
   6  
   7  import (
   8  	"compress/flate"
   9  	"errors"
  10  	"fmt"
  11  	"hash/crc32"
  12  	"io"
  13  	"time"
  14  )
  15  
  16  // These constants are copied from the [flate] package, so that code that imports
  17  // [compress/gzip] does not also have to import [compress/flate].
  18  const (
  19  	NoCompression      = flate.NoCompression
  20  	BestSpeed          = flate.BestSpeed
  21  	BestCompression    = flate.BestCompression
  22  	DefaultCompression = flate.DefaultCompression
  23  	HuffmanOnly        = flate.HuffmanOnly
  24  )
  25  
  26  // A Writer is an [io.WriteCloser].
  27  // Writes to a Writer are compressed and written to w.
  28  type Writer struct {
  29  	Header      // written at first call to Write, Flush, or Close
  30  	w           io.Writer
  31  	level       int
  32  	wroteHeader bool
  33  	closed      bool
  34  	buf         [10]byte
  35  	compressor  *flate.Writer
  36  	digest      uint32 // CRC-32, IEEE polynomial (section 8)
  37  	size        uint32 // Uncompressed size (section 2.3.1)
  38  	err         error
  39  }
  40  
  41  // NewWriter returns a new [Writer].
  42  // Writes to the returned writer are compressed and written to w.
  43  //
  44  // It is the caller's responsibility to call Close on the [Writer] when done.
  45  // Writes may be buffered and not flushed until Close.
  46  //
  47  // Callers that wish to set the fields in Writer.[Header] must do so before
  48  // the first call to Write, Flush, or Close.
  49  func NewWriter(w io.Writer) *Writer {
  50  	z, _ := NewWriterLevel(w, DefaultCompression)
  51  	return z
  52  }
  53  
  54  // NewWriterLevel is like [NewWriter] but specifies the compression level instead
  55  // of assuming [DefaultCompression].
  56  //
  57  // The compression level can be [DefaultCompression], [NoCompression], [HuffmanOnly]
  58  // or any integer value between [BestSpeed] and [BestCompression] inclusive.
  59  // The error returned will be nil if the level is valid.
  60  func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
  61  	if level < HuffmanOnly || level > BestCompression {
  62  		return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
  63  	}
  64  	z := &Writer{}
  65  	z.init(w, level)
  66  	return z, nil
  67  }
  68  
  69  func (z *Writer) init(w io.Writer, level int) {
  70  	compressor := z.compressor
  71  	if compressor != nil {
  72  		compressor.Reset(w)
  73  	}
  74  	*z = Writer{
  75  		Header: Header{
  76  			OS: 255, // unknown
  77  		},
  78  		w:          w,
  79  		level:      level,
  80  		compressor: compressor,
  81  	}
  82  }
  83  
  84  // Reset discards the [Writer] z's state and makes it equivalent to the
  85  // result of its original state from [NewWriter] or [NewWriterLevel], but
  86  // writing to w instead. This permits reusing a [Writer] rather than
  87  // allocating a new one.
  88  func (z *Writer) Reset(w io.Writer) {
  89  	z.init(w, z.level)
  90  }
  91  
  92  // writeBytes writes a length-prefixed byte slice to z.w.
  93  func (z *Writer) writeBytes(b []byte) error {
  94  	if len(b) > 0xffff {
  95  		return errors.New("gzip.Write: Extra data is too large")
  96  	}
  97  	le.PutUint16(z.buf[:2], uint16(len(b)))
  98  	_, err := z.w.Write(z.buf[:2])
  99  	if err != nil {
 100  		return err
 101  	}
 102  	_, err = z.w.Write(b)
 103  	return err
 104  }
 105  
 106  // writeString writes a UTF-8 string s in GZIP's format to z.w.
 107  // GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
 108  func (z *Writer) writeString(s []byte) (err error) {
 109  	// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
 110  	needconv := false
 111  	for _, v := range string(s) {
 112  		if v == 0 || v > 0xff {
 113  			return errors.New("gzip.Write: non-Latin-1 header string")
 114  		}
 115  		if v > 0x7f {
 116  			needconv = true
 117  		}
 118  	}
 119  	if needconv {
 120  		b := []byte{:0:len(s)}
 121  		for _, v := range string(s) {
 122  			b = append(b, byte(v))
 123  		}
 124  		_, err = z.w.Write(b)
 125  	} else {
 126  		_, err = z.w.Write(s)
 127  	}
 128  	if err != nil {
 129  		return err
 130  	}
 131  	// GZIP strings are NUL-terminated.
 132  	z.buf[0] = 0
 133  	_, err = z.w.Write(z.buf[:1])
 134  	return err
 135  }
 136  
 137  // Write writes a compressed form of p to the underlying [io.Writer]. The
 138  // compressed bytes are not necessarily flushed until the [Writer] is closed.
 139  func (z *Writer) Write(p []byte) (int, error) {
 140  	if z.err != nil {
 141  		return 0, z.err
 142  	}
 143  	var n int
 144  	// Write the GZIP header lazily.
 145  	if !z.wroteHeader {
 146  		z.wroteHeader = true
 147  		z.buf = [10]byte{0: gzipID1, 1: gzipID2, 2: gzipDeflate}
 148  		if z.Extra != nil {
 149  			z.buf[3] |= 0x04
 150  		}
 151  		if z.Name != "" {
 152  			z.buf[3] |= 0x08
 153  		}
 154  		if z.Comment != "" {
 155  			z.buf[3] |= 0x10
 156  		}
 157  		if z.ModTime.After(time.Unix(0, 0)) {
 158  			// Section 2.3.1, the zero value for MTIME means that the
 159  			// modified time is not set.
 160  			le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix()))
 161  		}
 162  		if z.level == BestCompression {
 163  			z.buf[8] = 2
 164  		} else if z.level == BestSpeed {
 165  			z.buf[8] = 4
 166  		}
 167  		z.buf[9] = z.OS
 168  		_, z.err = z.w.Write(z.buf[:10])
 169  		if z.err != nil {
 170  			return 0, z.err
 171  		}
 172  		if z.Extra != nil {
 173  			z.err = z.writeBytes(z.Extra)
 174  			if z.err != nil {
 175  				return 0, z.err
 176  			}
 177  		}
 178  		if z.Name != "" {
 179  			z.err = z.writeString(z.Name)
 180  			if z.err != nil {
 181  				return 0, z.err
 182  			}
 183  		}
 184  		if z.Comment != "" {
 185  			z.err = z.writeString(z.Comment)
 186  			if z.err != nil {
 187  				return 0, z.err
 188  			}
 189  		}
 190  		if z.compressor == nil {
 191  			z.compressor, _ = flate.NewWriter(z.w, z.level)
 192  		}
 193  	}
 194  	z.size += uint32(len(p))
 195  	z.digest = crc32.Update(z.digest, crc32.IEEETable, p)
 196  	n, z.err = z.compressor.Write(p)
 197  	return n, z.err
 198  }
 199  
 200  // Flush flushes any pending compressed data to the underlying writer.
 201  //
 202  // It is useful mainly in compressed network protocols, to ensure that
 203  // a remote reader has enough data to reconstruct a packet. Flush does
 204  // not return until the data has been written. If the underlying
 205  // writer returns an error, Flush returns that error.
 206  //
 207  // In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
 208  func (z *Writer) Flush() error {
 209  	if z.err != nil {
 210  		return z.err
 211  	}
 212  	if z.closed {
 213  		return nil
 214  	}
 215  	if !z.wroteHeader {
 216  		z.Write(nil)
 217  		if z.err != nil {
 218  			return z.err
 219  		}
 220  	}
 221  	z.err = z.compressor.Flush()
 222  	return z.err
 223  }
 224  
 225  // Close closes the [Writer] by flushing any unwritten data to the underlying
 226  // [io.Writer] and writing the GZIP footer.
 227  // It does not close the underlying [io.Writer].
 228  func (z *Writer) Close() error {
 229  	if z.err != nil {
 230  		return z.err
 231  	}
 232  	if z.closed {
 233  		return nil
 234  	}
 235  	z.closed = true
 236  	if !z.wroteHeader {
 237  		z.Write(nil)
 238  		if z.err != nil {
 239  			return z.err
 240  		}
 241  	}
 242  	z.err = z.compressor.Close()
 243  	if z.err != nil {
 244  		return z.err
 245  	}
 246  	le.PutUint32(z.buf[:4], z.digest)
 247  	le.PutUint32(z.buf[4:8], z.size)
 248  	_, z.err = z.w.Write(z.buf[:8])
 249  	return z.err
 250  }
 251