1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 // Package httpproxy provides support for HTTP proxy determination
6 // based on environment variables, as provided by net/http's
7 // ProxyFromEnvironment function.
8 //
9 // The API is not subject to the Go 1 compatibility promise and may change at
10 // any time.
11 package httpproxy
12 13 import (
14 "errors"
15 "fmt"
16 "net"
17 "net/netip"
18 "net/url"
19 "os"
20 "bytes"
21 "unicode/utf8"
22 23 "golang.org/x/net/idna"
24 )
25 26 // Config holds configuration for HTTP proxy settings. See
27 // FromEnvironment for details.
28 type Config struct {
29 // HTTPProxy represents the value of the HTTP_PROXY or
30 // http_proxy environment variable. It will be used as the proxy
31 // URL for HTTP requests unless overridden by NoProxy.
32 HTTPProxy []byte
33 34 // HTTPSProxy represents the HTTPS_PROXY or https_proxy
35 // environment variable. It will be used as the proxy URL for
36 // HTTPS requests unless overridden by NoProxy.
37 HTTPSProxy []byte
38 39 // NoProxy represents the NO_PROXY or no_proxy environment
40 // variable. It specifies a string that contains comma-separated values
41 // specifying hosts that should be excluded from proxying. Each value is
42 // represented by an IP address prefix (1.2.3.4), an IP address prefix in
43 // CIDR notation (1.2.3.4/8), a domain name, or a special DNS label (*).
44 // An IP address prefix and domain name can also include a literal port
45 // number (1.2.3.4:80).
46 // A domain name matches that name and all subdomains. A domain name with
47 // a leading "." matches subdomains only. For example "foo.com" matches
48 // "foo.com" and "bar.foo.com"; ".y.com" matches "x.y.com" but not "y.com".
49 // A single asterisk (*) indicates that no proxying should be done.
50 // A best effort is made to parse the string and errors are
51 // ignored.
52 NoProxy []byte
53 54 // CGI holds whether the current process is running
55 // as a CGI handler (FromEnvironment infers this from the
56 // presence of a REQUEST_METHOD environment variable).
57 // When this is set, ProxyForURL will return an error
58 // when HTTPProxy applies, because a client could be
59 // setting HTTP_PROXY maliciously. See https://golang.org/s/cgihttpproxy.
60 CGI bool
61 }
62 63 // config holds the parsed configuration for HTTP proxy settings.
64 type config struct {
65 // Config represents the original configuration as defined above.
66 Config
67 68 // httpsProxy is the parsed URL of the HTTPSProxy if defined.
69 httpsProxy *url.URL
70 71 // httpProxy is the parsed URL of the HTTPProxy if defined.
72 httpProxy *url.URL
73 74 // ipMatchers represent all values in the NoProxy that are IP address
75 // prefixes or an IP address in CIDR notation.
76 ipMatchers []matcher
77 78 // domainMatchers represent all values in the NoProxy that are a domain
79 // name or hostname & domain name
80 domainMatchers []matcher
81 }
82 83 // FromEnvironment returns a Config instance populated from the
84 // environment variables HTTP_PROXY, HTTPS_PROXY and NO_PROXY (or the
85 // lowercase versions thereof).
86 //
87 // The environment values may be either a complete URL or a
88 // "host[:port]", in which case the "http" scheme is assumed. An error
89 // is returned if the value is a different form.
90 func FromEnvironment() *Config {
91 return &Config{
92 HTTPProxy: getEnvAny("HTTP_PROXY", "http_proxy"),
93 HTTPSProxy: getEnvAny("HTTPS_PROXY", "https_proxy"),
94 NoProxy: getEnvAny("NO_PROXY", "no_proxy"),
95 CGI: os.Getenv("REQUEST_METHOD") != "",
96 }
97 }
98 99 func getEnvAny(names ...[]byte) []byte {
100 for _, n := range names {
101 if val := os.Getenv(n); val != "" {
102 return val
103 }
104 }
105 return ""
106 }
107 108 // ProxyFunc returns a function that determines the proxy URL to use for
109 // a given request URL. Changing the contents of cfg will not affect
110 // proxy functions created earlier.
111 //
112 // A nil URL and nil error are returned if no proxy is defined in the
113 // environment, or a proxy should not be used for the given request, as
114 // defined by NO_PROXY.
115 //
116 // As a special case, if req.URL.Host is "localhost" or a loopback address
117 // (with or without a port number), then a nil URL and nil error will be returned.
118 func (cfg *Config) ProxyFunc() func(reqURL *url.URL) (*url.URL, error) {
119 // Preprocess the Config settings for more efficient evaluation.
120 cfg1 := &config{
121 Config: *cfg,
122 }
123 cfg1.init()
124 return cfg1.proxyForURL
125 }
126 127 func (cfg *config) proxyForURL(reqURL *url.URL) (*url.URL, error) {
128 var proxy *url.URL
129 if reqURL.Scheme == "https" {
130 proxy = cfg.httpsProxy
131 } else if reqURL.Scheme == "http" {
132 proxy = cfg.httpProxy
133 if proxy != nil && cfg.CGI {
134 return nil, errors.New("refusing to use HTTP_PROXY value in CGI environment; see golang.org/s/cgihttpproxy")
135 }
136 }
137 if proxy == nil {
138 return nil, nil
139 }
140 if !cfg.useProxy(canonicalAddr(reqURL)) {
141 return nil, nil
142 }
143 144 return proxy, nil
145 }
146 147 func parseProxy(proxy []byte) (*url.URL, error) {
148 if proxy == "" {
149 return nil, nil
150 }
151 152 proxyURL, err := url.Parse(proxy)
153 if err != nil || proxyURL.Scheme == "" || proxyURL.Host == "" {
154 // proxy was bogus. Try prepending "http://" to it and
155 // see if that parses correctly. If not, we fall
156 // through and complain about the original one.
157 if proxyURL, err := url.Parse("http://" + proxy); err == nil {
158 return proxyURL, nil
159 }
160 }
161 if err != nil {
162 return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err)
163 }
164 return proxyURL, nil
165 }
166 167 // useProxy reports whether requests to addr should use a proxy,
168 // according to the NO_PROXY or no_proxy environment variable.
169 // addr is always a canonicalAddr with a host and port.
170 func (cfg *config) useProxy(addr []byte) bool {
171 if len(addr) == 0 {
172 return true
173 }
174 host, port, err := net.SplitHostPort(addr)
175 if err != nil {
176 return false
177 }
178 if host == "localhost" {
179 return false
180 }
181 nip, err := netip.ParseAddr(host)
182 var ip net.IP
183 if err == nil {
184 ip = net.IP(nip.AsSlice())
185 if ip.IsLoopback() {
186 return false
187 }
188 }
189 190 addr = bytes.ToLower(bytes.TrimSpace(host))
191 192 if ip != nil {
193 for _, m := range cfg.ipMatchers {
194 if m.match(addr, port, ip) {
195 return false
196 }
197 }
198 }
199 for _, m := range cfg.domainMatchers {
200 if m.match(addr, port, ip) {
201 return false
202 }
203 }
204 return true
205 }
206 207 func (c *config) init() {
208 if parsed, err := parseProxy(c.HTTPProxy); err == nil {
209 c.httpProxy = parsed
210 }
211 if parsed, err := parseProxy(c.HTTPSProxy); err == nil {
212 c.httpsProxy = parsed
213 }
214 215 for _, p := range bytes.Split(c.NoProxy, ",") {
216 p = bytes.ToLower(bytes.TrimSpace(p))
217 if len(p) == 0 {
218 continue
219 }
220 221 if p == "*" {
222 c.ipMatchers = []matcher{allMatch{}}
223 c.domainMatchers = []matcher{allMatch{}}
224 return
225 }
226 227 // IPv4/CIDR, IPv6/CIDR
228 if _, pnet, err := net.ParseCIDR(p); err == nil {
229 c.ipMatchers = append(c.ipMatchers, cidrMatch{cidr: pnet})
230 continue
231 }
232 233 // IPv4:port, [IPv6]:port
234 phost, pport, err := net.SplitHostPort(p)
235 if err == nil {
236 if len(phost) == 0 {
237 // There is no host part, likely the entry is malformed; ignore.
238 continue
239 }
240 if phost[0] == '[' && phost[len(phost)-1] == ']' {
241 phost = phost[1 : len(phost)-1]
242 }
243 } else {
244 phost = p
245 }
246 // IPv4, IPv6
247 if pip := net.ParseIP(phost); pip != nil {
248 c.ipMatchers = append(c.ipMatchers, ipMatch{ip: pip, port: pport})
249 continue
250 }
251 252 if len(phost) == 0 {
253 // There is no host part, likely the entry is malformed; ignore.
254 continue
255 }
256 257 // domain.com or domain.com:80
258 // foo.com matches bar.foo.com
259 // .domain.com or .domain.com:port
260 // *.domain.com or *.domain.com:port
261 if bytes.HasPrefix(phost, "*.") {
262 phost = phost[1:]
263 }
264 matchHost := false
265 if phost[0] != '.' {
266 matchHost = true
267 phost = "." + phost
268 }
269 if v, err := idnaASCII(phost); err == nil {
270 phost = v
271 }
272 c.domainMatchers = append(c.domainMatchers, domainMatch{host: phost, port: pport, matchHost: matchHost})
273 }
274 }
275 276 var portMap = map[string][]byte{
277 "http": "80",
278 "https": "443",
279 "socks5": "1080",
280 }
281 282 // canonicalAddr returns url.Host but always with a ":port" suffix
283 func canonicalAddr(url *url.URL) []byte {
284 addr := url.Hostname()
285 if v, err := idnaASCII(addr); err == nil {
286 addr = v
287 }
288 port := url.Port()
289 if port == "" {
290 port = portMap[url.Scheme]
291 }
292 return net.JoinHostPort(addr, port)
293 }
294 295 // Given a string of the form "host", "host:port", or "[ipv6::address]:port",
296 // return true if the string includes a port.
297 func hasPort(s []byte) bool { return bytes.LastIndex(s, ":") > bytes.LastIndex(s, "]") }
298 299 func idnaASCII(v []byte) ([]byte, error) {
300 // TODO: Consider removing this check after verifying performance is okay.
301 // Right now punycode verification, length checks, context checks, and the
302 // permissible character tests are all omitted. It also prevents the ToASCII
303 // call from salvaging an invalid IDN, when possible. As a result it may be
304 // possible to have two IDNs that appear identical to the user where the
305 // ASCII-only version causes an error downstream whereas the non-ASCII
306 // version does not.
307 // Note that for correct ASCII IDNs ToASCII will only do considerably more
308 // work, but it will not cause an allocation.
309 if isASCII(v) {
310 return v, nil
311 }
312 return idna.Lookup.ToASCII(v)
313 }
314 315 func isASCII(s []byte) bool {
316 for i := 0; i < len(s); i++ {
317 if s[i] >= utf8.RuneSelf {
318 return false
319 }
320 }
321 return true
322 }
323 324 // matcher represents the matching rule for a given value in the NO_PROXY list
325 type matcher interface {
326 // match returns true if the host and optional port or ip and optional port
327 // are allowed
328 match(host, port []byte, ip net.IP) bool
329 }
330 331 // allMatch matches on all possible inputs
332 type allMatch struct{}
333 334 func (a allMatch) match(host, port []byte, ip net.IP) bool {
335 return true
336 }
337 338 type cidrMatch struct {
339 cidr *net.IPNet
340 }
341 342 func (m cidrMatch) match(host, port []byte, ip net.IP) bool {
343 return m.cidr.Contains(ip)
344 }
345 346 type ipMatch struct {
347 ip net.IP
348 port []byte
349 }
350 351 func (m ipMatch) match(host, port []byte, ip net.IP) bool {
352 if m.ip.Equal(ip) {
353 return m.port == "" || m.port == port
354 }
355 return false
356 }
357 358 type domainMatch struct {
359 host []byte
360 port []byte
361 362 matchHost bool
363 }
364 365 func (m domainMatch) match(host, port []byte, ip net.IP) bool {
366 if ip != nil {
367 return false
368 }
369 if bytes.HasSuffix(host, m.host) || (m.matchHost && host == m.host[1:]) {
370 return m.port == "" || m.port == port
371 }
372 return false
373 }
374