proxy.mx raw

   1  // Copyright 2017 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package httpproxy provides support for HTTP proxy determination
   6  // based on environment variables, as provided by net/http's
   7  // ProxyFromEnvironment function.
   8  //
   9  // The API is not subject to the Go 1 compatibility promise and may change at
  10  // any time.
  11  package httpproxy
  12  
  13  import (
  14  	"errors"
  15  	"fmt"
  16  	"net"
  17  	"net/netip"
  18  	"net/url"
  19  	"os"
  20  	"bytes"
  21  	"unicode/utf8"
  22  
  23  	"golang.org/x/net/idna"
  24  )
  25  
  26  // Config holds configuration for HTTP proxy settings. See
  27  // FromEnvironment for details.
  28  type Config struct {
  29  	// HTTPProxy represents the value of the HTTP_PROXY or
  30  	// http_proxy environment variable. It will be used as the proxy
  31  	// URL for HTTP requests unless overridden by NoProxy.
  32  	HTTPProxy []byte
  33  
  34  	// HTTPSProxy represents the HTTPS_PROXY or https_proxy
  35  	// environment variable. It will be used as the proxy URL for
  36  	// HTTPS requests unless overridden by NoProxy.
  37  	HTTPSProxy []byte
  38  
  39  	// NoProxy represents the NO_PROXY or no_proxy environment
  40  	// variable. It specifies a string that contains comma-separated values
  41  	// specifying hosts that should be excluded from proxying. Each value is
  42  	// represented by an IP address prefix (1.2.3.4), an IP address prefix in
  43  	// CIDR notation (1.2.3.4/8), a domain name, or a special DNS label (*).
  44  	// An IP address prefix and domain name can also include a literal port
  45  	// number (1.2.3.4:80).
  46  	// A domain name matches that name and all subdomains. A domain name with
  47  	// a leading "." matches subdomains only. For example "foo.com" matches
  48  	// "foo.com" and "bar.foo.com"; ".y.com" matches "x.y.com" but not "y.com".
  49  	// A single asterisk (*) indicates that no proxying should be done.
  50  	// A best effort is made to parse the string and errors are
  51  	// ignored.
  52  	NoProxy []byte
  53  
  54  	// CGI holds whether the current process is running
  55  	// as a CGI handler (FromEnvironment infers this from the
  56  	// presence of a REQUEST_METHOD environment variable).
  57  	// When this is set, ProxyForURL will return an error
  58  	// when HTTPProxy applies, because a client could be
  59  	// setting HTTP_PROXY maliciously. See https://golang.org/s/cgihttpproxy.
  60  	CGI bool
  61  }
  62  
  63  // config holds the parsed configuration for HTTP proxy settings.
  64  type config struct {
  65  	// Config represents the original configuration as defined above.
  66  	Config
  67  
  68  	// httpsProxy is the parsed URL of the HTTPSProxy if defined.
  69  	httpsProxy *url.URL
  70  
  71  	// httpProxy is the parsed URL of the HTTPProxy if defined.
  72  	httpProxy *url.URL
  73  
  74  	// ipMatchers represent all values in the NoProxy that are IP address
  75  	// prefixes or an IP address in CIDR notation.
  76  	ipMatchers []matcher
  77  
  78  	// domainMatchers represent all values in the NoProxy that are a domain
  79  	// name or hostname & domain name
  80  	domainMatchers []matcher
  81  }
  82  
  83  // FromEnvironment returns a Config instance populated from the
  84  // environment variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY (or the
  85  // lowercase versions thereof).
  86  //
  87  // The environment values may be either a complete URL or a
  88  // "host[:port]", in which case the "http" scheme is assumed. An error
  89  // is returned if the value is a different form.
  90  func FromEnvironment() *Config {
  91  	return &Config{
  92  		HTTPProxy:  getEnvAny("HTTP_PROXY", "http_proxy"),
  93  		HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy"),
  94  		NoProxy:    getEnvAny("NO_PROXY", "no_proxy"),
  95  		CGI:        os.Getenv("REQUEST_METHOD") != "",
  96  	}
  97  }
  98  
  99  func getEnvAny(names ...[]byte) []byte {
 100  	for _, n := range names {
 101  		if val := os.Getenv(n); val != "" {
 102  			return val
 103  		}
 104  	}
 105  	return ""
 106  }
 107  
 108  // ProxyFunc returns a function that determines the proxy URL to use for
 109  // a given request URL. Changing the contents of cfg will not affect
 110  // proxy functions created earlier.
 111  //
 112  // A nil URL and nil error are returned if no proxy is defined in the
 113  // environment, or a proxy should not be used for the given request, as
 114  // defined by NO_PROXY.
 115  //
 116  // As a special case, if req.URL.Host is "localhost" or a loopback address
 117  // (with or without a port number), then a nil URL and nil error will be returned.
 118  func (cfg *Config) ProxyFunc() func(reqURL *url.URL) (*url.URL, error) {
 119  	// Preprocess the Config settings for more efficient evaluation.
 120  	cfg1 := &config{
 121  		Config: *cfg,
 122  	}
 123  	cfg1.init()
 124  	return cfg1.proxyForURL
 125  }
 126  
 127  func (cfg *config) proxyForURL(reqURL *url.URL) (*url.URL, error) {
 128  	var proxy *url.URL
 129  	if reqURL.Scheme == "https" {
 130  		proxy = cfg.httpsProxy
 131  	} else if reqURL.Scheme == "http" {
 132  		proxy = cfg.httpProxy
 133  		if proxy != nil && cfg.CGI {
 134  			return nil, errors.New("refusing to use HTTP_PROXY value in CGI environment; see golang.org/s/cgihttpproxy")
 135  		}
 136  	}
 137  	if proxy == nil {
 138  		return nil, nil
 139  	}
 140  	if !cfg.useProxy(canonicalAddr(reqURL)) {
 141  		return nil, nil
 142  	}
 143  
 144  	return proxy, nil
 145  }
 146  
 147  func parseProxy(proxy []byte) (*url.URL, error) {
 148  	if proxy == "" {
 149  		return nil, nil
 150  	}
 151  
 152  	proxyURL, err := url.Parse(proxy)
 153  	if err != nil || proxyURL.Scheme == "" || proxyURL.Host == "" {
 154  		// proxy was bogus. Try prepending "http://" to it and
 155  		// see if that parses correctly. If not, we fall
 156  		// through and complain about the original one.
 157  		if proxyURL, err := url.Parse("http://" + proxy); err == nil {
 158  			return proxyURL, nil
 159  		}
 160  	}
 161  	if err != nil {
 162  		return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err)
 163  	}
 164  	return proxyURL, nil
 165  }
 166  
 167  // useProxy reports whether requests to addr should use a proxy,
 168  // according to the NO_PROXY or no_proxy environment variable.
 169  // addr is always a canonicalAddr with a host and port.
 170  func (cfg *config) useProxy(addr []byte) bool {
 171  	if len(addr) == 0 {
 172  		return true
 173  	}
 174  	host, port, err := net.SplitHostPort(addr)
 175  	if err != nil {
 176  		return false
 177  	}
 178  	if host == "localhost" {
 179  		return false
 180  	}
 181  	nip, err := netip.ParseAddr(host)
 182  	var ip net.IP
 183  	if err == nil {
 184  		ip = net.IP(nip.AsSlice())
 185  		if ip.IsLoopback() {
 186  			return false
 187  		}
 188  	}
 189  
 190  	addr = bytes.ToLower(bytes.TrimSpace(host))
 191  
 192  	if ip != nil {
 193  		for _, m := range cfg.ipMatchers {
 194  			if m.match(addr, port, ip) {
 195  				return false
 196  			}
 197  		}
 198  	}
 199  	for _, m := range cfg.domainMatchers {
 200  		if m.match(addr, port, ip) {
 201  			return false
 202  		}
 203  	}
 204  	return true
 205  }
 206  
 207  func (c *config) init() {
 208  	if parsed, err := parseProxy(c.HTTPProxy); err == nil {
 209  		c.httpProxy = parsed
 210  	}
 211  	if parsed, err := parseProxy(c.HTTPSProxy); err == nil {
 212  		c.httpsProxy = parsed
 213  	}
 214  
 215  	for _, p := range bytes.Split(c.NoProxy, ",") {
 216  		p = bytes.ToLower(bytes.TrimSpace(p))
 217  		if len(p) == 0 {
 218  			continue
 219  		}
 220  
 221  		if p == "*" {
 222  			c.ipMatchers = []matcher{allMatch{}}
 223  			c.domainMatchers = []matcher{allMatch{}}
 224  			return
 225  		}
 226  
 227  		// IPv4/CIDR, IPv6/CIDR
 228  		if _, pnet, err := net.ParseCIDR(p); err == nil {
 229  			c.ipMatchers = append(c.ipMatchers, cidrMatch{cidr: pnet})
 230  			continue
 231  		}
 232  
 233  		// IPv4:port, [IPv6]:port
 234  		phost, pport, err := net.SplitHostPort(p)
 235  		if err == nil {
 236  			if len(phost) == 0 {
 237  				// There is no host part, likely the entry is malformed; ignore.
 238  				continue
 239  			}
 240  			if phost[0] == '[' && phost[len(phost)-1] == ']' {
 241  				phost = phost[1 : len(phost)-1]
 242  			}
 243  		} else {
 244  			phost = p
 245  		}
 246  		// IPv4, IPv6
 247  		if pip := net.ParseIP(phost); pip != nil {
 248  			c.ipMatchers = append(c.ipMatchers, ipMatch{ip: pip, port: pport})
 249  			continue
 250  		}
 251  
 252  		if len(phost) == 0 {
 253  			// There is no host part, likely the entry is malformed; ignore.
 254  			continue
 255  		}
 256  
 257  		// domain.com or domain.com:80
 258  		// foo.com matches bar.foo.com
 259  		// .domain.com or .domain.com:port
 260  		// *.domain.com or *.domain.com:port
 261  		if bytes.HasPrefix(phost, "*.") {
 262  			phost = phost[1:]
 263  		}
 264  		matchHost := false
 265  		if phost[0] != '.' {
 266  			matchHost = true
 267  			phost = "." + phost
 268  		}
 269  		if v, err := idnaASCII(phost); err == nil {
 270  			phost = v
 271  		}
 272  		c.domainMatchers = append(c.domainMatchers, domainMatch{host: phost, port: pport, matchHost: matchHost})
 273  	}
 274  }
 275  
 276  var portMap = map[string][]byte{
 277  	"http":   "80",
 278  	"https":  "443",
 279  	"socks5": "1080",
 280  }
 281  
 282  // canonicalAddr returns url.Host but always with a ":port" suffix
 283  func canonicalAddr(url *url.URL) []byte {
 284  	addr := url.Hostname()
 285  	if v, err := idnaASCII(addr); err == nil {
 286  		addr = v
 287  	}
 288  	port := url.Port()
 289  	if port == "" {
 290  		port = portMap[url.Scheme]
 291  	}
 292  	return net.JoinHostPort(addr, port)
 293  }
 294  
 295  // Given a string of the form "host", "host:port", or "[ipv6::address]:port",
 296  // return true if the string includes a port.
 297  func hasPort(s []byte) bool { return bytes.LastIndex(s, ":") > bytes.LastIndex(s, "]") }
 298  
 299  func idnaASCII(v []byte) ([]byte, error) {
 300  	// TODO: Consider removing this check after verifying performance is okay.
 301  	// Right now punycode verification, length checks, context checks, and the
 302  	// permissible character tests are all omitted. It also prevents the ToASCII
 303  	// call from salvaging an invalid IDN, when possible. As a result it may be
 304  	// possible to have two IDNs that appear identical to the user where the
 305  	// ASCII-only version causes an error downstream whereas the non-ASCII
 306  	// version does not.
 307  	// Note that for correct ASCII IDNs ToASCII will only do considerably more
 308  	// work, but it will not cause an allocation.
 309  	if isASCII(v) {
 310  		return v, nil
 311  	}
 312  	return idna.Lookup.ToASCII(v)
 313  }
 314  
 315  func isASCII(s []byte) bool {
 316  	for i := 0; i < len(s); i++ {
 317  		if s[i] >= utf8.RuneSelf {
 318  			return false
 319  		}
 320  	}
 321  	return true
 322  }
 323  
 324  // matcher represents the matching rule for a given value in the NO_PROXY list
 325  type matcher interface {
 326  	// match returns true if the host and optional port or ip and optional port
 327  	// are allowed
 328  	match(host, port []byte, ip net.IP) bool
 329  }
 330  
 331  // allMatch matches on all possible inputs
 332  type allMatch struct{}
 333  
 334  func (a allMatch) match(host, port []byte, ip net.IP) bool {
 335  	return true
 336  }
 337  
 338  type cidrMatch struct {
 339  	cidr *net.IPNet
 340  }
 341  
 342  func (m cidrMatch) match(host, port []byte, ip net.IP) bool {
 343  	return m.cidr.Contains(ip)
 344  }
 345  
 346  type ipMatch struct {
 347  	ip   net.IP
 348  	port []byte
 349  }
 350  
 351  func (m ipMatch) match(host, port []byte, ip net.IP) bool {
 352  	if m.ip.Equal(ip) {
 353  		return m.port == "" || m.port == port
 354  	}
 355  	return false
 356  }
 357  
 358  type domainMatch struct {
 359  	host []byte
 360  	port []byte
 361  
 362  	matchHost bool
 363  }
 364  
 365  func (m domainMatch) match(host, port []byte, ip net.IP) bool {
 366  	if ip != nil {
 367  		return false
 368  	}
 369  	if bytes.HasSuffix(host, m.host) || (m.matchHost && host == m.host[1:]) {
 370  		return m.port == "" || m.port == port
 371  	}
 372  	return false
 373  }
 374