hpack.mx raw

   1  // Copyright 2014 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package hpack implements HPACK, a compression format for
   6  // efficiently representing HTTP header fields in the context of HTTP/2.
   7  //
   8  // See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
   9  package hpack
  10  
  11  import (
  12  	"bytes"
  13  	"errors"
  14  	"fmt"
  15  )
  16  
  17  // A DecodingError is something the spec defines as a decoding error.
  18  type DecodingError struct {
  19  	Err error
  20  }
  21  
  22  func (de DecodingError) Error() string {
  23  	return fmt.Sprintf("decoding error: %v", de.Err)
  24  }
  25  
  26  // An InvalidIndexError is returned when an encoder references a table
  27  // entry before the static table or after the end of the dynamic table.
  28  type InvalidIndexError int
  29  
  30  func (e InvalidIndexError) Error() string {
  31  	return fmt.Sprintf("invalid indexed representation index %d", int(e))
  32  }
  33  
  34  // A HeaderField is a name-value pair. Both the name and value are
  35  // treated as opaque sequences of octets.
  36  type HeaderField struct {
  37  	Name, Value []byte
  38  
  39  	// Sensitive means that this header field should never be
  40  	// indexed.
  41  	Sensitive bool
  42  }
  43  
  44  // IsPseudo reports whether the header field is an http2 pseudo header.
  45  // That is, it reports whether it starts with a colon.
  46  // It is not otherwise guaranteed to be a valid pseudo header field,
  47  // though.
  48  func (hf HeaderField) IsPseudo() bool {
  49  	return len(hf.Name) != 0 && hf.Name[0] == ':'
  50  }
  51  
  52  func (hf HeaderField) String() string {
  53  	var suffix string
  54  	if hf.Sensitive {
  55  		suffix = " (sensitive)"
  56  	}
  57  	return fmt.Sprintf("header field %q = %q%s", hf.Name, hf.Value, suffix)
  58  }
  59  
  60  // Size returns the size of an entry per RFC 7541 section 4.1.
  61  func (hf HeaderField) Size() uint32 {
  62  	// https://httpwg.org/specs/rfc7541.html#rfc.section.4.1
  63  	// "The size of the dynamic table is the sum of the size of
  64  	// its entries. The size of an entry is the sum of its name's
  65  	// length in octets (as defined in Section 5.2), its value's
  66  	// length in octets (see Section 5.2), plus 32.  The size of
  67  	// an entry is calculated using the length of the name and
  68  	// value without any Huffman encoding applied."
  69  
  70  	// This can overflow if somebody makes a large HeaderField
  71  	// Name and/or Value by hand, but we don't care, because that
  72  	// won't happen on the wire because the encoding doesn't allow
  73  	// it.
  74  	return uint32(len(hf.Name) + len(hf.Value) + 32)
  75  }
  76  
  77  // A Decoder is the decoding context for incremental processing of
  78  // header blocks.
  79  type Decoder struct {
  80  	dynTab dynamicTable
  81  	emit   func(f HeaderField)
  82  
  83  	emitEnabled bool // whether calls to emit are enabled
  84  	maxStrLen   int  // 0 means unlimited
  85  
  86  	// buf is the unparsed buffer. It's only written to
  87  	// saveBuf if it was truncated in the middle of a header
  88  	// block. Because it's usually not owned, we can only
  89  	// process it under Write.
  90  	buf []byte // not owned; only valid during Write
  91  
  92  	// saveBuf is previous data passed to Write which we weren't able
  93  	// to fully parse before. Unlike buf, we own this data.
  94  	saveBuf bytes.Buffer
  95  
  96  	firstField bool // processing the first field of the header block
  97  }
  98  
  99  // NewDecoder returns a new decoder with the provided maximum dynamic
 100  // table size. The emitFunc will be called for each valid field
 101  // parsed, in the same goroutine as calls to Write, before Write returns.
 102  func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder {
 103  	d := &Decoder{
 104  		emit:        emitFunc,
 105  		emitEnabled: true,
 106  		firstField:  true,
 107  	}
 108  	d.dynTab.table.init()
 109  	d.dynTab.allowedMaxSize = maxDynamicTableSize
 110  	d.dynTab.setMaxSize(maxDynamicTableSize)
 111  	return d
 112  }
 113  
 114  // ErrStringLength is returned by Decoder.Write when the max string length
 115  // (as configured by Decoder.SetMaxStringLength) would be violated.
 116  var ErrStringLength = errors.New("hpack: string too long")
 117  
 118  // SetMaxStringLength sets the maximum size of a HeaderField name or
 119  // value string. If a string exceeds this length (even after any
 120  // decompression), Write will return ErrStringLength.
 121  // A value of 0 means unlimited and is the default from NewDecoder.
 122  func (d *Decoder) SetMaxStringLength(n int) {
 123  	d.maxStrLen = n
 124  }
 125  
 126  // SetEmitFunc changes the callback used when new header fields
 127  // are decoded.
 128  // It must be non-nil. It does not affect EmitEnabled.
 129  func (d *Decoder) SetEmitFunc(emitFunc func(f HeaderField)) {
 130  	d.emit = emitFunc
 131  }
 132  
 133  // SetEmitEnabled controls whether the emitFunc provided to NewDecoder
 134  // should be called. The default is true.
 135  //
 136  // This facility exists to let servers enforce MAX_HEADER_LIST_SIZE
 137  // while still decoding and keeping in-sync with decoder state, but
 138  // without doing unnecessary decompression or generating unnecessary
 139  // garbage for header fields past the limit.
 140  func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v }
 141  
 142  // EmitEnabled reports whether calls to the emitFunc provided to NewDecoder
 143  // are currently enabled. The default is true.
 144  func (d *Decoder) EmitEnabled() bool { return d.emitEnabled }
 145  
 146  // TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
 147  // underlying buffers for garbage reasons.
 148  
 149  func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
 150  	d.dynTab.setMaxSize(v)
 151  }
 152  
 153  // SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
 154  // stream (via dynamic table size updates) may set the maximum size
 155  // to.
 156  func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
 157  	d.dynTab.allowedMaxSize = v
 158  }
 159  
 160  type dynamicTable struct {
 161  	// https://httpwg.org/specs/rfc7541.html#rfc.section.2.3.2
 162  	table          headerFieldTable
 163  	size           uint32 // in bytes
 164  	maxSize        uint32 // current maxSize
 165  	allowedMaxSize uint32 // maxSize may go up to this, inclusive
 166  }
 167  
 168  func (dt *dynamicTable) setMaxSize(v uint32) {
 169  	dt.maxSize = v
 170  	dt.evict()
 171  }
 172  
 173  func (dt *dynamicTable) add(f HeaderField) {
 174  	dt.table.addEntry(f)
 175  	dt.size += f.Size()
 176  	dt.evict()
 177  }
 178  
 179  // If we're too big, evict old stuff.
 180  func (dt *dynamicTable) evict() {
 181  	var n int
 182  	for dt.size > dt.maxSize && n < dt.table.len() {
 183  		dt.size -= dt.table.ents[n].Size()
 184  		n++
 185  	}
 186  	dt.table.evictOldest(n)
 187  }
 188  
 189  func (d *Decoder) maxTableIndex() int {
 190  	// This should never overflow. RFC 7540 Section 6.5.2 limits the size of
 191  	// the dynamic table to 2^32 bytes, where each entry will occupy more than
 192  	// one byte. Further, the staticTable has a fixed, small length.
 193  	return d.dynTab.table.len() + staticTable.len()
 194  }
 195  
 196  func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
 197  	// See Section 2.3.3.
 198  	if i == 0 {
 199  		return
 200  	}
 201  	if i <= uint64(staticTable.len()) {
 202  		return staticTable.ents[i-1], true
 203  	}
 204  	if i > uint64(d.maxTableIndex()) {
 205  		return
 206  	}
 207  	// In the dynamic table, newer entries have lower indices.
 208  	// However, dt.ents[0] is the oldest entry. Hence, dt.ents is
 209  	// the reversed dynamic table.
 210  	dt := d.dynTab.table
 211  	return dt.ents[dt.len()-(int(i)-staticTable.len())], true
 212  }
 213  
 214  // DecodeFull decodes an entire block.
 215  //
 216  // TODO: remove this method and make it incremental later? This is
 217  // easier for debugging now.
 218  func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
 219  	var hf []HeaderField
 220  	saveFunc := d.emit
 221  	defer func() { d.emit = saveFunc }()
 222  	d.emit = func(f HeaderField) { hf = append(hf, f) }
 223  	if _, err := d.Write(p); err != nil {
 224  		return nil, err
 225  	}
 226  	if err := d.Close(); err != nil {
 227  		return nil, err
 228  	}
 229  	return hf, nil
 230  }
 231  
 232  // Close declares that the decoding is complete and resets the Decoder
 233  // to be reused again for a new header block. If there is any remaining
 234  // data in the decoder's buffer, Close returns an error.
 235  func (d *Decoder) Close() error {
 236  	if d.saveBuf.Len() > 0 {
 237  		d.saveBuf.Reset()
 238  		return DecodingError{errors.New("truncated headers")}
 239  	}
 240  	d.firstField = true
 241  	return nil
 242  }
 243  
 244  func (d *Decoder) Write(p []byte) (n int, err error) {
 245  	if len(p) == 0 {
 246  		// Prevent state machine CPU attacks (making us redo
 247  		// work up to the point of finding out we don't have
 248  		// enough data)
 249  		return
 250  	}
 251  	// Only copy the data if we have to. Optimistically assume
 252  	// that p will contain a complete header block.
 253  	if d.saveBuf.Len() == 0 {
 254  		d.buf = p
 255  	} else {
 256  		d.saveBuf.Write(p)
 257  		d.buf = d.saveBuf.Bytes()
 258  		d.saveBuf.Reset()
 259  	}
 260  
 261  	for len(d.buf) > 0 {
 262  		err = d.parseHeaderFieldRepr()
 263  		if err == errNeedMore {
 264  			// Extra paranoia, making sure saveBuf won't
 265  			// get too large. All the varint and string
 266  			// reading code earlier should already catch
 267  			// overlong things and return ErrStringLength,
 268  			// but keep this as a last resort.
 269  			const varIntOverhead = 8 // conservative
 270  			if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) {
 271  				return 0, ErrStringLength
 272  			}
 273  			d.saveBuf.Write(d.buf)
 274  			return len(p), nil
 275  		}
 276  		d.firstField = false
 277  		if err != nil {
 278  			break
 279  		}
 280  	}
 281  	return len(p), err
 282  }
 283  
 284  // errNeedMore is an internal sentinel error value that means the
 285  // buffer is truncated and we need to read more data before we can
 286  // continue parsing.
 287  var errNeedMore = errors.New("need more data")
 288  
 289  type indexType int
 290  
 291  const (
 292  	indexedTrue indexType = iota
 293  	indexedFalse
 294  	indexedNever
 295  )
 296  
 297  func (v indexType) indexed() bool   { return v == indexedTrue }
 298  func (v indexType) sensitive() bool { return v == indexedNever }
 299  
 300  // returns errNeedMore if there isn't enough data available.
 301  // any other error is fatal.
 302  // consumes d.buf iff it returns nil.
 303  // precondition: must be called with len(d.buf) > 0
 304  func (d *Decoder) parseHeaderFieldRepr() error {
 305  	b := d.buf[0]
 306  	switch {
 307  	case b&128 != 0:
 308  		// Indexed representation.
 309  		// High bit set?
 310  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.1
 311  		return d.parseFieldIndexed()
 312  	case b&192 == 64:
 313  		// 6.2.1 Literal Header Field with Incremental Indexing
 314  		// 0b10xxxxxx: top two bits are 10
 315  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.1
 316  		return d.parseFieldLiteral(6, indexedTrue)
 317  	case b&240 == 0:
 318  		// 6.2.2 Literal Header Field without Indexing
 319  		// 0b0000xxxx: top four bits are 0000
 320  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.2
 321  		return d.parseFieldLiteral(4, indexedFalse)
 322  	case b&240 == 16:
 323  		// 6.2.3 Literal Header Field never Indexed
 324  		// 0b0001xxxx: top four bits are 0001
 325  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.2.3
 326  		return d.parseFieldLiteral(4, indexedNever)
 327  	case b&224 == 32:
 328  		// 6.3 Dynamic Table Size Update
 329  		// Top three bits are '001'.
 330  		// https://httpwg.org/specs/rfc7541.html#rfc.section.6.3
 331  		return d.parseDynamicTableSizeUpdate()
 332  	}
 333  
 334  	return DecodingError{errors.New("invalid encoding")}
 335  }
 336  
 337  // (same invariants and behavior as parseHeaderFieldRepr)
 338  func (d *Decoder) parseFieldIndexed() error {
 339  	buf := d.buf
 340  	idx, buf, err := readVarInt(7, buf)
 341  	if err != nil {
 342  		return err
 343  	}
 344  	hf, ok := d.at(idx)
 345  	if !ok {
 346  		return DecodingError{InvalidIndexError(idx)}
 347  	}
 348  	d.buf = buf
 349  	return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value})
 350  }
 351  
 352  // (same invariants and behavior as parseHeaderFieldRepr)
 353  func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
 354  	buf := d.buf
 355  	nameIdx, buf, err := readVarInt(n, buf)
 356  	if err != nil {
 357  		return err
 358  	}
 359  
 360  	var hf HeaderField
 361  	wantStr := d.emitEnabled || it.indexed()
 362  	var undecodedName undecodedString
 363  	if nameIdx > 0 {
 364  		ihf, ok := d.at(nameIdx)
 365  		if !ok {
 366  			return DecodingError{InvalidIndexError(nameIdx)}
 367  		}
 368  		hf.Name = ihf.Name
 369  	} else {
 370  		undecodedName, buf, err = d.readString(buf)
 371  		if err != nil {
 372  			return err
 373  		}
 374  	}
 375  	undecodedValue, buf, err := d.readString(buf)
 376  	if err != nil {
 377  		return err
 378  	}
 379  	if wantStr {
 380  		if nameIdx <= 0 {
 381  			hf.Name, err = d.decodeString(undecodedName)
 382  			if err != nil {
 383  				return err
 384  			}
 385  		}
 386  		hf.Value, err = d.decodeString(undecodedValue)
 387  		if err != nil {
 388  			return err
 389  		}
 390  	}
 391  	d.buf = buf
 392  	if it.indexed() {
 393  		d.dynTab.add(hf)
 394  	}
 395  	hf.Sensitive = it.sensitive()
 396  	return d.callEmit(hf)
 397  }
 398  
 399  func (d *Decoder) callEmit(hf HeaderField) error {
 400  	if d.maxStrLen != 0 {
 401  		if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen {
 402  			return ErrStringLength
 403  		}
 404  	}
 405  	if d.emitEnabled {
 406  		d.emit(hf)
 407  	}
 408  	return nil
 409  }
 410  
 411  // (same invariants and behavior as parseHeaderFieldRepr)
 412  func (d *Decoder) parseDynamicTableSizeUpdate() error {
 413  	// RFC 7541, sec 4.2: This dynamic table size update MUST occur at the
 414  	// beginning of the first header block following the change to the dynamic table size.
 415  	if !d.firstField && d.dynTab.size > 0 {
 416  		return DecodingError{errors.New("dynamic table size update MUST occur at the beginning of a header block")}
 417  	}
 418  
 419  	buf := d.buf
 420  	size, buf, err := readVarInt(5, buf)
 421  	if err != nil {
 422  		return err
 423  	}
 424  	if size > uint64(d.dynTab.allowedMaxSize) {
 425  		return DecodingError{errors.New("dynamic table size update too large")}
 426  	}
 427  	d.dynTab.setMaxSize(uint32(size))
 428  	d.buf = buf
 429  	return nil
 430  }
 431  
 432  var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
 433  
 434  // readVarInt reads an unsigned variable length integer off the
 435  // beginning of p. n is the parameter as described in
 436  // https://httpwg.org/specs/rfc7541.html#rfc.section.5.1.
 437  //
 438  // n must always be between 1 and 8.
 439  //
 440  // The returned remain buffer is either a smaller suffix of p, or err != nil.
 441  // The error is errNeedMore if p doesn't contain a complete integer.
 442  func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
 443  	if n < 1 || n > 8 {
 444  		panic("bad n")
 445  	}
 446  	if len(p) == 0 {
 447  		return 0, p, errNeedMore
 448  	}
 449  	i = uint64(p[0])
 450  	if n < 8 {
 451  		i &= (1 << uint64(n)) - 1
 452  	}
 453  	if i < (1<<uint64(n))-1 {
 454  		return i, p[1:], nil
 455  	}
 456  
 457  	origP := p
 458  	p = p[1:]
 459  	var m uint64
 460  	for len(p) > 0 {
 461  		b := p[0]
 462  		p = p[1:]
 463  		i += uint64(b&127) << m
 464  		if b&128 == 0 {
 465  			return i, p, nil
 466  		}
 467  		m += 7
 468  		if m >= 63 { // TODO: proper overflow check. making this up.
 469  			return 0, origP, errVarintOverflow
 470  		}
 471  	}
 472  	return 0, origP, errNeedMore
 473  }
 474  
 475  // readString reads an hpack string from p.
 476  //
 477  // It returns a reference to the encoded string data to permit deferring decode costs
 478  // until after the caller verifies all data is present.
 479  func (d *Decoder) readString(p []byte) (u undecodedString, remain []byte, err error) {
 480  	if len(p) == 0 {
 481  		return u, p, errNeedMore
 482  	}
 483  	isHuff := p[0]&128 != 0
 484  	strLen, p, err := readVarInt(7, p)
 485  	if err != nil {
 486  		return u, p, err
 487  	}
 488  	if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) {
 489  		// Returning an error here means Huffman decoding errors
 490  		// for non-indexed strings past the maximum string length
 491  		// are ignored, but the server is returning an error anyway
 492  		// and because the string is not indexed the error will not
 493  		// affect the decoding state.
 494  		return u, nil, ErrStringLength
 495  	}
 496  	if uint64(len(p)) < strLen {
 497  		return u, p, errNeedMore
 498  	}
 499  	u.isHuff = isHuff
 500  	u.b = p[:strLen]
 501  	return u, p[strLen:], nil
 502  }
 503  
 504  type undecodedString struct {
 505  	isHuff bool
 506  	b      []byte
 507  }
 508  
 509  func (d *Decoder) decodeString(u undecodedString) ([]byte, error) {
 510  	if !u.isHuff {
 511  		return []byte(u.b), nil
 512  	}
 513  	buf := bufPool.Get().(*bytes.Buffer)
 514  	buf.Reset() // don't trust others
 515  	var s []byte
 516  	err := huffmanDecode(buf, d.maxStrLen, u.b)
 517  	if err == nil {
 518  		s = buf.String()
 519  	}
 520  	buf.Reset() // be nice to GC
 521  	bufPool.Put(buf)
 522  	return s, err
 523  }
 524