1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // The wire protocol for HTTP's "chunked" Transfer-Encoding.
6 7 // Package internal contains HTTP internals shared by net/http and
8 // net/http/httputil.
9 package internal
10 11 import (
12 "bufio"
13 "bytes"
14 "errors"
15 "fmt"
16 "io"
17 )
18 19 const maxLineLength = 4096 // assumed <= bufio.defaultBufSize
20 21 var ErrLineTooLong = errors.New("header line too long")
22 23 // NewChunkedReader returns a new chunkedReader that translates the data read from r
24 // out of HTTP "chunked" format before returning it.
25 // The chunkedReader returns [io.EOF] when the final 0-length chunk is read.
26 //
27 // NewChunkedReader is not needed by normal applications. The http package
28 // automatically decodes chunking when reading response bodies.
29 func NewChunkedReader(r io.Reader) io.Reader {
30 br, ok := r.(*bufio.Reader)
31 if !ok {
32 br = bufio.NewReader(r)
33 }
34 return &chunkedReader{r: br}
35 }
36 37 type chunkedReader struct {
38 r *bufio.Reader
39 n uint64 // unread bytes in chunk
40 err error
41 buf [2]byte
42 checkEnd bool // whether need to check for \r\n chunk footer
43 excess int64 // "excessive" chunk overhead, for malicious sender detection
44 }
45 46 func (cr *chunkedReader) beginChunk() {
47 // chunk-size CRLF
48 var line []byte
49 line, cr.err = readChunkLine(cr.r)
50 if cr.err != nil {
51 return
52 }
53 cr.excess += int64(len(line)) + 2 // header, plus \r\n after the chunk data
54 line = trimTrailingWhitespace(line)
55 line, cr.err = removeChunkExtension(line)
56 if cr.err != nil {
57 return
58 }
59 cr.n, cr.err = parseHexUint(line)
60 if cr.err != nil {
61 return
62 }
63 // A sender who sends one byte per chunk will send 5 bytes of overhead
64 // for every byte of data. ("1\r\nX\r\n" to send "X".)
65 // We want to allow this, since streaming a byte at a time can be legitimate.
66 //
67 // A sender can use chunk extensions to add arbitrary amounts of additional
68 // data per byte read. ("1;very long extension\r\nX\r\n" to send "X".)
69 // We don't want to disallow extensions (although we discard them),
70 // but we also don't want to allow a sender to reduce the signal/noise ratio
71 // arbitrarily.
72 //
73 // We track the amount of excess overhead read,
74 // and produce an error if it grows too large.
75 //
76 // Currently, we say that we're willing to accept 16 bytes of overhead per chunk,
77 // plus twice the amount of real data in the chunk.
78 cr.excess -= 16 + (2 * int64(cr.n))
79 cr.excess = max(cr.excess, 0)
80 if cr.excess > 16*1024 {
81 cr.err = errors.New("chunked encoding contains too much non-data")
82 }
83 if cr.n == 0 {
84 cr.err = io.EOF
85 }
86 }
87 88 func (cr *chunkedReader) chunkHeaderAvailable() bool {
89 n := cr.r.Buffered()
90 if n > 0 {
91 peek, _ := cr.r.Peek(n)
92 return bytes.IndexByte(peek, '\n') >= 0
93 }
94 return false
95 }
96 97 func (cr *chunkedReader) Read(b []uint8) (n int, err error) {
98 for cr.err == nil {
99 if cr.checkEnd {
100 if n > 0 && cr.r.Buffered() < 2 {
101 // We have some data. Return early (per the io.Reader
102 // contract) instead of potentially blocking while
103 // reading more.
104 break
105 }
106 if _, cr.err = io.ReadFull(cr.r, cr.buf[:2]); cr.err == nil {
107 if []byte(cr.buf[:]) != "\r\n" {
108 cr.err = errors.New("malformed chunked encoding")
109 break
110 }
111 } else {
112 if cr.err == io.EOF {
113 cr.err = io.ErrUnexpectedEOF
114 }
115 break
116 }
117 cr.checkEnd = false
118 }
119 if cr.n == 0 {
120 if n > 0 && !cr.chunkHeaderAvailable() {
121 // We've read enough. Don't potentially block
122 // reading a new chunk header.
123 break
124 }
125 cr.beginChunk()
126 continue
127 }
128 if len(b) == 0 {
129 break
130 }
131 rbuf := b
132 if uint64(len(rbuf)) > cr.n {
133 rbuf = rbuf[:cr.n]
134 }
135 var n0 int
136 n0, cr.err = cr.r.Read(rbuf)
137 n += n0
138 b = b[n0:]
139 cr.n -= uint64(n0)
140 // If we're at the end of a chunk, read the next two
141 // bytes to verify they are "\r\n".
142 if cr.n == 0 && cr.err == nil {
143 cr.checkEnd = true
144 } else if cr.err == io.EOF {
145 cr.err = io.ErrUnexpectedEOF
146 }
147 }
148 return n, cr.err
149 }
150 151 // Read a line of bytes (up to \n) from b.
152 // Give up if the line exceeds maxLineLength.
153 // The returned bytes are owned by the bufio.Reader
154 // so they are only valid until the next bufio read.
155 func readChunkLine(b *bufio.Reader) ([]byte, error) {
156 p, err := b.ReadSlice('\n')
157 if err != nil {
158 // We always know when EOF is coming.
159 // If the caller asked for a line, there should be a line.
160 if err == io.EOF {
161 err = io.ErrUnexpectedEOF
162 } else if err == bufio.ErrBufferFull {
163 err = ErrLineTooLong
164 }
165 return nil, err
166 }
167 168 // RFC 9112 permits parsers to accept a bare \n as a line ending in headers,
169 // but not in chunked encoding lines. See https://www.rfc-editor.org/errata/eid7633,
170 // which explicitly rejects a clarification permitting \n as a chunk terminator.
171 //
172 // Verify that the line ends in a CRLF, and that no CRs appear before the end.
173 if idx := bytes.IndexByte(p, '\r'); idx == -1 {
174 return nil, errors.New("chunked line ends with bare LF")
175 } else if idx != len(p)-2 {
176 return nil, errors.New("invalid CR in chunked line")
177 }
178 p = p[:len(p)-2] // trim CRLF
179 180 if len(p) >= maxLineLength {
181 return nil, ErrLineTooLong
182 }
183 return p, nil
184 }
185 186 func trimTrailingWhitespace(b []byte) []byte {
187 for len(b) > 0 && isOWS(b[len(b)-1]) {
188 b = b[:len(b)-1]
189 }
190 return b
191 }
192 193 func isOWS(b byte) bool {
194 return b == ' ' || b == '\t'
195 }
196 197 var semi = []byte(";")
198 199 // removeChunkExtension removes any chunk-extension from p.
200 // For example,
201 //
202 // "0" => "0"
203 // "0;token" => "0"
204 // "0;token=val" => "0"
205 // `0;token="quoted string"` => "0"
206 func removeChunkExtension(p []byte) ([]byte, error) {
207 p, _, _ = bytes.Cut(p, semi)
208 // TODO: care about exact syntax of chunk extensions? We're
209 // ignoring and stripping them anyway. For now just never
210 // return an error.
211 return p, nil
212 }
213 214 // NewChunkedWriter returns a new chunkedWriter that translates writes into HTTP
215 // "chunked" format before writing them to w. Closing the returned chunkedWriter
216 // sends the final 0-length chunk that marks the end of the stream but does
217 // not send the final CRLF that appears after trailers; trailers and the last
218 // CRLF must be written separately.
219 //
220 // NewChunkedWriter is not needed by normal applications. The http
221 // package adds chunking automatically if handlers don't set a
222 // Content-Length header. Using newChunkedWriter inside a handler
223 // would result in double chunking or chunking with a Content-Length
224 // length, both of which are wrong.
225 func NewChunkedWriter(w io.Writer) io.WriteCloser {
226 return &chunkedWriter{w}
227 }
228 229 // Writing to chunkedWriter translates to writing in HTTP chunked Transfer
230 // Encoding wire format to the underlying Wire chunkedWriter.
231 type chunkedWriter struct {
232 Wire io.Writer
233 }
234 235 // Write the contents of data as one chunk to Wire.
236 // NOTE: Note that the corresponding chunk-writing procedure in Conn.Write has
237 // a bug since it does not check for success of [io.WriteString]
238 func (cw *chunkedWriter) Write(data []byte) (n int, err error) {
239 240 // Don't send 0-length data. It looks like EOF for chunked encoding.
241 if len(data) == 0 {
242 return 0, nil
243 }
244 245 if _, err = fmt.Fprintf(cw.Wire, "%x\r\n", len(data)); err != nil {
246 return 0, err
247 }
248 if n, err = cw.Wire.Write(data); err != nil {
249 return
250 }
251 if n != len(data) {
252 err = io.ErrShortWrite
253 return
254 }
255 if _, err = io.WriteString(cw.Wire, "\r\n"); err != nil {
256 return
257 }
258 if bw, ok := cw.Wire.(*FlushAfterChunkWriter); ok {
259 err = bw.Flush()
260 }
261 return
262 }
263 264 func (cw *chunkedWriter) Close() error {
265 _, err := io.WriteString(cw.Wire, "0\r\n")
266 return err
267 }
268 269 // FlushAfterChunkWriter signals from the caller of [NewChunkedWriter]
270 // that each chunk should be followed by a flush. It is used by the
271 // [net/http.Transport] code to keep the buffering behavior for headers and
272 // trailers, but flush out chunks aggressively in the middle for
273 // request bodies which may be generated slowly. See Issue 6574.
274 type FlushAfterChunkWriter struct {
275 *bufio.Writer
276 }
277 278 func parseHexUint(v []byte) (n uint64, err error) {
279 if len(v) == 0 {
280 return 0, errors.New("empty hex number for chunk length")
281 }
282 for i, b := range v {
283 switch {
284 case '0' <= b && b <= '9':
285 b = b - '0'
286 case 'a' <= b && b <= 'f':
287 b = b - 'a' + 10
288 case 'A' <= b && b <= 'F':
289 b = b - 'A' + 10
290 default:
291 return 0, errors.New("invalid byte in chunk length")
292 }
293 if i == 16 {
294 return 0, errors.New("http chunk length too large")
295 }
296 n <<= 4
297 n |= uint64(b)
298 }
299 return
300 }
301