httplex.mx raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package httpguts
   6  
   7  import (
   8  	"net"
   9  	"bytes"
  10  	"unicode/utf8"
  11  
  12  	"golang.org/x/net/idna"
  13  )
  14  
  15  var isTokenTable = [256]bool{
  16  	'!':  true,
  17  	'#':  true,
  18  	'$':  true,
  19  	'%':  true,
  20  	'&':  true,
  21  	'\'': true,
  22  	'*':  true,
  23  	'+':  true,
  24  	'-':  true,
  25  	'.':  true,
  26  	'0':  true,
  27  	'1':  true,
  28  	'2':  true,
  29  	'3':  true,
  30  	'4':  true,
  31  	'5':  true,
  32  	'6':  true,
  33  	'7':  true,
  34  	'8':  true,
  35  	'9':  true,
  36  	'A':  true,
  37  	'B':  true,
  38  	'C':  true,
  39  	'D':  true,
  40  	'E':  true,
  41  	'F':  true,
  42  	'G':  true,
  43  	'H':  true,
  44  	'I':  true,
  45  	'J':  true,
  46  	'K':  true,
  47  	'L':  true,
  48  	'M':  true,
  49  	'N':  true,
  50  	'O':  true,
  51  	'P':  true,
  52  	'Q':  true,
  53  	'R':  true,
  54  	'S':  true,
  55  	'T':  true,
  56  	'U':  true,
  57  	'W':  true,
  58  	'V':  true,
  59  	'X':  true,
  60  	'Y':  true,
  61  	'Z':  true,
  62  	'^':  true,
  63  	'_':  true,
  64  	'`':  true,
  65  	'a':  true,
  66  	'b':  true,
  67  	'c':  true,
  68  	'd':  true,
  69  	'e':  true,
  70  	'f':  true,
  71  	'g':  true,
  72  	'h':  true,
  73  	'i':  true,
  74  	'j':  true,
  75  	'k':  true,
  76  	'l':  true,
  77  	'm':  true,
  78  	'n':  true,
  79  	'o':  true,
  80  	'p':  true,
  81  	'q':  true,
  82  	'r':  true,
  83  	's':  true,
  84  	't':  true,
  85  	'u':  true,
  86  	'v':  true,
  87  	'w':  true,
  88  	'x':  true,
  89  	'y':  true,
  90  	'z':  true,
  91  	'|':  true,
  92  	'~':  true,
  93  }
  94  
  95  func IsTokenRune(r rune) bool {
  96  	return r < utf8.RuneSelf && isTokenTable[byte(r)]
  97  }
  98  
  99  // HeaderValuesContainsToken reports whether any string in values
 100  // contains the provided token, ASCII case-insensitively.
 101  func HeaderValuesContainsToken(values [][]byte, token []byte) bool {
 102  	for _, v := range values {
 103  		if headerValueContainsToken(v, token) {
 104  			return true
 105  		}
 106  	}
 107  	return false
 108  }
 109  
 110  // isOWS reports whether b is an optional whitespace byte, as defined
 111  // by RFC 7230 section 3.2.3.
 112  func isOWS(b byte) bool { return b == ' ' || b == '\t' }
 113  
 114  // trimOWS returns x with all optional whitespace removes from the
 115  // beginning and end.
 116  func trimOWS(x []byte) []byte {
 117  	// TODO: consider using bytes.Trim(x, " \t") instead,
 118  	// if and when it's fast enough. See issue 10292.
 119  	// But this ASCII-only code will probably always beat UTF-8
 120  	// aware code.
 121  	for len(x) > 0 && isOWS(x[0]) {
 122  		x = x[1:]
 123  	}
 124  	for len(x) > 0 && isOWS(x[len(x)-1]) {
 125  		x = x[:len(x)-1]
 126  	}
 127  	return x
 128  }
 129  
 130  // headerValueContainsToken reports whether v (assumed to be a
 131  // 0#element, in the ABNF extension described in RFC 7230 section 7)
 132  // contains token amongst its comma-separated tokens, ASCII
 133  // case-insensitively.
 134  func headerValueContainsToken(v []byte, token []byte) bool {
 135  	for comma := bytes.IndexByte(v, ','); comma != -1; comma = bytes.IndexByte(v, ',') {
 136  		if tokenEqual(trimOWS(v[:comma]), token) {
 137  			return true
 138  		}
 139  		v = v[comma+1:]
 140  	}
 141  	return tokenEqual(trimOWS(v), token)
 142  }
 143  
 144  // lowerASCII returns the ASCII lowercase version of b.
 145  func lowerASCII(b byte) byte {
 146  	if 'A' <= b && b <= 'Z' {
 147  		return b + ('a' - 'A')
 148  	}
 149  	return b
 150  }
 151  
 152  // tokenEqual reports whether t1 and t2 are equal, ASCII case-insensitively.
 153  func tokenEqual(t1, t2 []byte) bool {
 154  	if len(t1) != len(t2) {
 155  		return false
 156  	}
 157  	for i, b := range t1 {
 158  		if b >= utf8.RuneSelf {
 159  			// No UTF-8 or non-ASCII allowed in tokens.
 160  			return false
 161  		}
 162  		if lowerASCII(byte(b)) != lowerASCII(t2[i]) {
 163  			return false
 164  		}
 165  	}
 166  	return true
 167  }
 168  
 169  // isLWS reports whether b is linear white space, according
 170  // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
 171  //
 172  //	LWS            = [CRLF] 1*( SP | HT )
 173  func isLWS(b byte) bool { return b == ' ' || b == '\t' }
 174  
 175  // isCTL reports whether b is a control byte, according
 176  // to http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2
 177  //
 178  //	CTL            = <any US-ASCII control character
 179  //	                 (octets 0 - 31) and DEL (127)>
 180  func isCTL(b byte) bool {
 181  	const del = 0x7f // a CTL
 182  	return b < ' ' || b == del
 183  }
 184  
 185  // ValidHeaderFieldName reports whether v is a valid HTTP/1.x header name.
 186  // HTTP/2 imposes the additional restriction that uppercase ASCII
 187  // letters are not allowed.
 188  //
 189  // RFC 7230 says:
 190  //
 191  //	header-field   = field-name ":" OWS field-value OWS
 192  //	field-name     = token
 193  //	token          = 1*tchar
 194  //	tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
 195  //	        "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
 196  func ValidHeaderFieldName(v []byte) bool {
 197  	if len(v) == 0 {
 198  		return false
 199  	}
 200  	for i := 0; i < len(v); i++ {
 201  		if !isTokenTable[v[i]] {
 202  			return false
 203  		}
 204  	}
 205  	return true
 206  }
 207  
 208  // ValidHostHeader reports whether h is a valid host header.
 209  func ValidHostHeader(h []byte) bool {
 210  	// The latest spec is actually this:
 211  	//
 212  	// http://tools.ietf.org/html/rfc7230#section-5.4
 213  	//     Host = uri-host [ ":" port ]
 214  	//
 215  	// Where uri-host is:
 216  	//     http://tools.ietf.org/html/rfc3986#section-3.2.2
 217  	//
 218  	// But we're going to be much more lenient for now and just
 219  	// search for any byte that's not a valid byte in any of those
 220  	// expressions.
 221  	for i := 0; i < len(h); i++ {
 222  		if !validHostByte[h[i]] {
 223  			return false
 224  		}
 225  	}
 226  	return true
 227  }
 228  
 229  // See the validHostHeader comment.
 230  var validHostByte = [256]bool{
 231  	'0': true, '1': true, '2': true, '3': true, '4': true, '5': true, '6': true, '7': true,
 232  	'8': true, '9': true,
 233  
 234  	'a': true, 'b': true, 'c': true, 'd': true, 'e': true, 'f': true, 'g': true, 'h': true,
 235  	'i': true, 'j': true, 'k': true, 'l': true, 'm': true, 'n': true, 'o': true, 'p': true,
 236  	'q': true, 'r': true, 's': true, 't': true, 'u': true, 'v': true, 'w': true, 'x': true,
 237  	'y': true, 'z': true,
 238  
 239  	'A': true, 'B': true, 'C': true, 'D': true, 'E': true, 'F': true, 'G': true, 'H': true,
 240  	'I': true, 'J': true, 'K': true, 'L': true, 'M': true, 'N': true, 'O': true, 'P': true,
 241  	'Q': true, 'R': true, 'S': true, 'T': true, 'U': true, 'V': true, 'W': true, 'X': true,
 242  	'Y': true, 'Z': true,
 243  
 244  	'!':  true, // sub-delims
 245  	'$':  true, // sub-delims
 246  	'%':  true, // pct-encoded (and used in IPv6 zones)
 247  	'&':  true, // sub-delims
 248  	'(':  true, // sub-delims
 249  	')':  true, // sub-delims
 250  	'*':  true, // sub-delims
 251  	'+':  true, // sub-delims
 252  	',':  true, // sub-delims
 253  	'-':  true, // unreserved
 254  	'.':  true, // unreserved
 255  	':':  true, // IPv6address + Host expression's optional port
 256  	';':  true, // sub-delims
 257  	'=':  true, // sub-delims
 258  	'[':  true,
 259  	'\'': true, // sub-delims
 260  	']':  true,
 261  	'_':  true, // unreserved
 262  	'~':  true, // unreserved
 263  }
 264  
 265  // ValidHeaderFieldValue reports whether v is a valid "field-value" according to
 266  // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2 :
 267  //
 268  //	message-header = field-name ":" [ field-value ]
 269  //	field-value    = *( field-content | LWS )
 270  //	field-content  = <the OCTETs making up the field-value
 271  //	                 and consisting of either *TEXT or combinations
 272  //	                 of token, separators, and quoted-string>
 273  //
 274  // http://www.w3.org/Protocols/rfc2616/rfc2616-sec2.html#sec2.2 :
 275  //
 276  //	TEXT           = <any OCTET except CTLs,
 277  //	                  but including LWS>
 278  //	LWS            = [CRLF] 1*( SP | HT )
 279  //	CTL            = <any US-ASCII control character
 280  //	                 (octets 0 - 31) and DEL (127)>
 281  //
 282  // RFC 7230 says:
 283  //
 284  //	field-value    = *( field-content / obs-fold )
 285  //	obj-fold       =  N/A to http2, and deprecated
 286  //	field-content  = field-vchar [ 1*( SP / HTAB ) field-vchar ]
 287  //	field-vchar    = VCHAR / obs-text
 288  //	obs-text       = %x80-FF
 289  //	VCHAR          = "any visible [USASCII] character"
 290  //
 291  // http2 further says: "Similarly, HTTP/2 allows header field values
 292  // that are not valid. While most of the values that can be encoded
 293  // will not alter header field parsing, carriage return (CR, ASCII
 294  // 0xd), line feed (LF, ASCII 0xa), and the zero character (NUL, ASCII
 295  // 0x0) might be exploited by an attacker if they are translated
 296  // verbatim. Any request or response that contains a character not
 297  // permitted in a header field value MUST be treated as malformed
 298  // (Section 8.1.2.6). Valid characters are defined by the
 299  // field-content ABNF rule in Section 3.2 of [RFC7230]."
 300  //
 301  // This function does not (yet?) properly handle the rejection of
 302  // strings that begin or end with SP or HTAB.
 303  func ValidHeaderFieldValue(v []byte) bool {
 304  	for i := 0; i < len(v); i++ {
 305  		b := v[i]
 306  		if isCTL(b) && !isLWS(b) {
 307  			return false
 308  		}
 309  	}
 310  	return true
 311  }
 312  
 313  func isASCII(s []byte) bool {
 314  	for i := 0; i < len(s); i++ {
 315  		if s[i] >= utf8.RuneSelf {
 316  			return false
 317  		}
 318  	}
 319  	return true
 320  }
 321  
 322  // PunycodeHostPort returns the IDNA Punycode version
 323  // of the provided "host" or "host:port" string.
 324  func PunycodeHostPort(v []byte) ([]byte, error) {
 325  	if isASCII(v) {
 326  		return v, nil
 327  	}
 328  
 329  	host, port, err := net.SplitHostPort(v)
 330  	if err != nil {
 331  		// The input 'v' argument was just a "host" argument,
 332  		// without a port. This error should not be returned
 333  		// to the caller.
 334  		host = v
 335  		port = ""
 336  	}
 337  	host, err = idna.ToASCII(host)
 338  	if err != nil {
 339  		// Non-UTF-8? Not representable in Punycode, in any
 340  		// case.
 341  		return "", err
 342  	}
 343  	if port == "" {
 344  		return host, nil
 345  	}
 346  	return net.JoinHostPort(host, port), nil
 347  }
 348