chunked.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // The wire protocol for HTTP's "chunked" Transfer-Encoding.
   6  
   7  // Package internal contains HTTP internals shared by net/http and
   8  // net/http/httputil.
   9  package internal
  10  
  11  import (
  12  	"bufio"
  13  	"bytes"
  14  	"errors"
  15  	"fmt"
  16  	"io"
  17  )
  18  
  19  const maxLineLength = 4096 // assumed <= bufio.defaultBufSize
  20  
  21  var ErrLineTooLong = errors.New("header line too long")
  22  
  23  // NewChunkedReader returns a new chunkedReader that translates the data read from r
  24  // out of HTTP "chunked" format before returning it.
  25  // The chunkedReader returns [io.EOF] when the final 0-length chunk is read.
  26  //
  27  // NewChunkedReader is not needed by normal applications. The http package
  28  // automatically decodes chunking when reading response bodies.
  29  func NewChunkedReader(r io.Reader) io.Reader {
  30  	br, ok := r.(*bufio.Reader)
  31  	if !ok {
  32  		br = bufio.NewReader(r)
  33  	}
  34  	return &chunkedReader{r: br}
  35  }
  36  
  37  type chunkedReader struct {
  38  	r        *bufio.Reader
  39  	n        uint64 // unread bytes in chunk
  40  	err      error
  41  	buf      [2]byte
  42  	checkEnd bool  // whether need to check for \r\n chunk footer
  43  	excess   int64 // "excessive" chunk overhead, for malicious sender detection
  44  }
  45  
  46  func (cr *chunkedReader) beginChunk() {
  47  	// chunk-size CRLF
  48  	var line []byte
  49  	line, cr.err = readChunkLine(cr.r)
  50  	if cr.err != nil {
  51  		return
  52  	}
  53  	cr.excess += int64(len(line)) + 2 // header, plus \r\n after the chunk data
  54  	line = trimTrailingWhitespace(line)
  55  	line, cr.err = removeChunkExtension(line)
  56  	if cr.err != nil {
  57  		return
  58  	}
  59  	cr.n, cr.err = parseHexUint(line)
  60  	if cr.err != nil {
  61  		return
  62  	}
  63  	// A sender who sends one byte per chunk will send 5 bytes of overhead
  64  	// for every byte of data. ("1\r\nX\r\n" to send "X".)
  65  	// We want to allow this, since streaming a byte at a time can be legitimate.
  66  	//
  67  	// A sender can use chunk extensions to add arbitrary amounts of additional
  68  	// data per byte read. ("1;very long extension\r\nX\r\n" to send "X".)
  69  	// We don't want to disallow extensions (although we discard them),
  70  	// but we also don't want to allow a sender to reduce the signal/noise ratio
  71  	// arbitrarily.
  72  	//
  73  	// We track the amount of excess overhead read,
  74  	// and produce an error if it grows too large.
  75  	//
  76  	// Currently, we say that we're willing to accept 16 bytes of overhead per chunk,
  77  	// plus twice the amount of real data in the chunk.
  78  	cr.excess -= 16 + (2 * int64(cr.n))
  79  	cr.excess = max(cr.excess, 0)
  80  	if cr.excess > 16*1024 {
  81  		cr.err = errors.New("chunked encoding contains too much non-data")
  82  	}
  83  	if cr.n == 0 {
  84  		cr.err = io.EOF
  85  	}
  86  }
  87  
  88  func (cr *chunkedReader) chunkHeaderAvailable() bool {
  89  	n := cr.r.Buffered()
  90  	if n > 0 {
  91  		peek, _ := cr.r.Peek(n)
  92  		return bytes.IndexByte(peek, '\n') >= 0
  93  	}
  94  	return false
  95  }
  96  
  97  func (cr *chunkedReader) Read(b []uint8) (n int, err error) {
  98  	for cr.err == nil {
  99  		if cr.checkEnd {
 100  			if n > 0 && cr.r.Buffered() < 2 {
 101  				// We have some data. Return early (per the io.Reader
 102  				// contract) instead of potentially blocking while
 103  				// reading more.
 104  				break
 105  			}
 106  			if _, cr.err = io.ReadFull(cr.r, cr.buf[:2]); cr.err == nil {
 107  				if []byte(cr.buf[:]) != "\r\n" {
 108  					cr.err = errors.New("malformed chunked encoding")
 109  					break
 110  				}
 111  			} else {
 112  				if cr.err == io.EOF {
 113  					cr.err = io.ErrUnexpectedEOF
 114  				}
 115  				break
 116  			}
 117  			cr.checkEnd = false
 118  		}
 119  		if cr.n == 0 {
 120  			if n > 0 && !cr.chunkHeaderAvailable() {
 121  				// We've read enough. Don't potentially block
 122  				// reading a new chunk header.
 123  				break
 124  			}
 125  			cr.beginChunk()
 126  			continue
 127  		}
 128  		if len(b) == 0 {
 129  			break
 130  		}
 131  		rbuf := b
 132  		if uint64(len(rbuf)) > cr.n {
 133  			rbuf = rbuf[:cr.n]
 134  		}
 135  		var n0 int
 136  		n0, cr.err = cr.r.Read(rbuf)
 137  		n += n0
 138  		b = b[n0:]
 139  		cr.n -= uint64(n0)
 140  		// If we're at the end of a chunk, read the next two
 141  		// bytes to verify they are "\r\n".
 142  		if cr.n == 0 && cr.err == nil {
 143  			cr.checkEnd = true
 144  		} else if cr.err == io.EOF {
 145  			cr.err = io.ErrUnexpectedEOF
 146  		}
 147  	}
 148  	return n, cr.err
 149  }
 150  
 151  // Read a line of bytes (up to \n) from b.
 152  // Give up if the line exceeds maxLineLength.
 153  // The returned bytes are owned by the bufio.Reader
 154  // so they are only valid until the next bufio read.
 155  func readChunkLine(b *bufio.Reader) ([]byte, error) {
 156  	p, err := b.ReadSlice('\n')
 157  	if err != nil {
 158  		// We always know when EOF is coming.
 159  		// If the caller asked for a line, there should be a line.
 160  		if err == io.EOF {
 161  			err = io.ErrUnexpectedEOF
 162  		} else if err == bufio.ErrBufferFull {
 163  			err = ErrLineTooLong
 164  		}
 165  		return nil, err
 166  	}
 167  
 168  	// RFC 9112 permits parsers to accept a bare \n as a line ending in headers,
 169  	// but not in chunked encoding lines. See https://www.rfc-editor.org/errata/eid7633,
 170  	// which explicitly rejects a clarification permitting \n as a chunk terminator.
 171  	//
 172  	// Verify that the line ends in a CRLF, and that no CRs appear before the end.
 173  	if idx := bytes.IndexByte(p, '\r'); idx == -1 {
 174  		return nil, errors.New("chunked line ends with bare LF")
 175  	} else if idx != len(p)-2 {
 176  		return nil, errors.New("invalid CR in chunked line")
 177  	}
 178  	p = p[:len(p)-2] // trim CRLF
 179  
 180  	if len(p) >= maxLineLength {
 181  		return nil, ErrLineTooLong
 182  	}
 183  	return p, nil
 184  }
 185  
 186  func trimTrailingWhitespace(b []byte) []byte {
 187  	for len(b) > 0 && isOWS(b[len(b)-1]) {
 188  		b = b[:len(b)-1]
 189  	}
 190  	return b
 191  }
 192  
 193  func isOWS(b byte) bool {
 194  	return b == ' ' || b == '\t'
 195  }
 196  
 197  var semi = []byte(";")
 198  
 199  // removeChunkExtension removes any chunk-extension from p.
 200  // For example,
 201  //
 202  //	"0" => "0"
 203  //	"0;token" => "0"
 204  //	"0;token=val" => "0"
 205  //	`0;token="quoted string"` => "0"
 206  func removeChunkExtension(p []byte) ([]byte, error) {
 207  	p, _, _ = bytes.Cut(p, semi)
 208  	// TODO: care about exact syntax of chunk extensions? We're
 209  	// ignoring and stripping them anyway. For now just never
 210  	// return an error.
 211  	return p, nil
 212  }
 213  
 214  // NewChunkedWriter returns a new chunkedWriter that translates writes into HTTP
 215  // "chunked" format before writing them to w. Closing the returned chunkedWriter
 216  // sends the final 0-length chunk that marks the end of the stream but does
 217  // not send the final CRLF that appears after trailers; trailers and the last
 218  // CRLF must be written separately.
 219  //
 220  // NewChunkedWriter is not needed by normal applications. The http
 221  // package adds chunking automatically if handlers don't set a
 222  // Content-Length header. Using newChunkedWriter inside a handler
 223  // would result in double chunking or chunking with a Content-Length
 224  // length, both of which are wrong.
 225  func NewChunkedWriter(w io.Writer) io.WriteCloser {
 226  	return &chunkedWriter{w}
 227  }
 228  
 229  // Writing to chunkedWriter translates to writing in HTTP chunked Transfer
 230  // Encoding wire format to the underlying Wire chunkedWriter.
 231  type chunkedWriter struct {
 232  	Wire io.Writer
 233  }
 234  
 235  // Write the contents of data as one chunk to Wire.
 236  // NOTE: Note that the corresponding chunk-writing procedure in Conn.Write has
 237  // a bug since it does not check for success of [io.WriteString]
 238  func (cw *chunkedWriter) Write(data []byte) (n int, err error) {
 239  
 240  	// Don't send 0-length data. It looks like EOF for chunked encoding.
 241  	if len(data) == 0 {
 242  		return 0, nil
 243  	}
 244  
 245  	if _, err = fmt.Fprintf(cw.Wire, "%x\r\n", len(data)); err != nil {
 246  		return 0, err
 247  	}
 248  	if n, err = cw.Wire.Write(data); err != nil {
 249  		return
 250  	}
 251  	if n != len(data) {
 252  		err = io.ErrShortWrite
 253  		return
 254  	}
 255  	if _, err = io.WriteString(cw.Wire, "\r\n"); err != nil {
 256  		return
 257  	}
 258  	if bw, ok := cw.Wire.(*FlushAfterChunkWriter); ok {
 259  		err = bw.Flush()
 260  	}
 261  	return
 262  }
 263  
 264  func (cw *chunkedWriter) Close() error {
 265  	_, err := io.WriteString(cw.Wire, "0\r\n")
 266  	return err
 267  }
 268  
 269  // FlushAfterChunkWriter signals from the caller of [NewChunkedWriter]
 270  // that each chunk should be followed by a flush. It is used by the
 271  // [net/http.Transport] code to keep the buffering behavior for headers and
 272  // trailers, but flush out chunks aggressively in the middle for
 273  // request bodies which may be generated slowly. See Issue 6574.
 274  type FlushAfterChunkWriter struct {
 275  	*bufio.Writer
 276  }
 277  
 278  func parseHexUint(v []byte) (n uint64, err error) {
 279  	if len(v) == 0 {
 280  		return 0, errors.New("empty hex number for chunk length")
 281  	}
 282  	for i, b := range v {
 283  		switch {
 284  		case '0' <= b && b <= '9':
 285  			b = b - '0'
 286  		case 'a' <= b && b <= 'f':
 287  			b = b - 'a' + 10
 288  		case 'A' <= b && b <= 'F':
 289  			b = b - 'A' + 10
 290  		default:
 291  			return 0, errors.New("invalid byte in chunk length")
 292  		}
 293  		if i == 16 {
 294  			return 0, errors.New("http chunk length too large")
 295  		}
 296  		n <<= 4
 297  		n |= uint64(b)
 298  	}
 299  	return
 300  }
 301