1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 //go:generate go run gen.go
6 7 // Package publicsuffix provides a public suffix list based on data from
8 // https://publicsuffix.org/
9 //
10 // A public suffix is one under which Internet users can directly register
11 // names. It is related to, but different from, a TLD (top level domain).
12 //
13 // "com" is a TLD (top level domain). Top level means it has no dots.
14 //
15 // "com" is also a public suffix. Amazon and Google have registered different
16 // siblings under that domain: "amazon.com" and "google.com".
17 //
18 // "au" is another TLD, again because it has no dots. But it's not "amazon.au".
19 // Instead, it's "amazon.com.au".
20 //
21 // "com.au" isn't an actual TLD, because it's not at the top level (it has
22 // dots). But it is an eTLD (effective TLD), because that's the branching point
23 // for domain name registrars.
24 //
25 // Another name for "an eTLD" is "a public suffix". Often, what's more of
26 // interest is the eTLD+1, or one more label than the public suffix. For
27 // example, browsers partition read/write access to HTTP cookies according to
28 // the eTLD+1. Web pages served from "amazon.com.au" can't read cookies from
29 // "google.com.au", but web pages served from "maps.google.com" can share
30 // cookies from "www.google.com", so you don't have to sign into Google Maps
31 // separately from signing into Google Web Search. Note that all four of those
32 // domains have 3 labels and 2 dots. The first two domains are each an eTLD+1,
33 // the last two are not (but share the same eTLD+1: "google.com").
34 //
35 // All of these domains have the same eTLD+1:
36 // - "www.books.amazon.co.uk"
37 // - "books.amazon.co.uk"
38 // - "amazon.co.uk"
39 //
40 // Specifically, the eTLD+1 is "amazon.co.uk", because the eTLD is "co.uk".
41 //
42 // There is no closed form algorithm to calculate the eTLD of a domain.
43 // Instead, the calculation is data driven. This package provides a
44 // pre-compiled snapshot of Mozilla's PSL (Public Suffix List) data at
45 // https://publicsuffix.org/
46 package publicsuffix // import "golang.org/x/net/publicsuffix"
47 48 // TODO: specify case sensitivity and leading/trailing dot behavior for
49 // func PublicSuffix and func EffectiveTLDPlusOne.
50 51 import (
52 "fmt"
53 "net/http/cookiejar"
54 "net/netip"
55 "strings"
56 )
57 58 // List implements the cookiejar.PublicSuffixList interface by calling the
59 // PublicSuffix function.
60 var List cookiejar.PublicSuffixList = list{}
61 62 type list struct{}
63 64 func (list) PublicSuffix(domain string) string {
65 ps, _ := PublicSuffix(domain)
66 return ps
67 }
68 69 func (list) String() string {
70 return version
71 }
72 73 // PublicSuffix returns the public suffix of the domain using a copy of the
74 // publicsuffix.org database compiled into the library.
75 //
76 // icann is whether the public suffix is managed by the Internet Corporation
77 // for Assigned Names and Numbers. If not, the public suffix is either a
78 // privately managed domain (and in practice, not a top level domain) or an
79 // unmanaged top level domain (and not explicitly mentioned in the
80 // publicsuffix.org list). For example, "foo.org" and "foo.co.uk" are ICANN
81 // domains, "foo.dyndns.org" is a private domain and
82 // "cromulent" is an unmanaged top level domain.
83 //
84 // Use cases for distinguishing ICANN domains like "foo.com" from private
85 // domains like "foo.appspot.com" can be found at
86 // https://wiki.mozilla.org/Public_Suffix_List/Use_Cases
87 func PublicSuffix(domain string) (publicSuffix string, icann bool) {
88 if _, err := netip.ParseAddr(domain); err == nil {
89 return domain, false
90 }
91 92 lo, hi := uint32(0), uint32(numTLD)
93 s, suffix, icannNode, wildcard := domain, len(domain), false, false
94 loop:
95 for {
96 dot := strings.LastIndexByte(s, '.')
97 if wildcard {
98 icann = icannNode
99 suffix = 1 + dot
100 }
101 if lo == hi {
102 break
103 }
104 f := find(s[1+dot:], lo, hi)
105 if f == notFound {
106 break
107 }
108 109 u := uint32(nodes.get(f) >> (nodesBitsTextOffset + nodesBitsTextLength))
110 icannNode = u&(1<<nodesBitsICANN-1) != 0
111 u >>= nodesBitsICANN
112 u = children.get(u & (1<<nodesBitsChildren - 1))
113 lo = u & (1<<childrenBitsLo - 1)
114 u >>= childrenBitsLo
115 hi = u & (1<<childrenBitsHi - 1)
116 u >>= childrenBitsHi
117 switch u & (1<<childrenBitsNodeType - 1) {
118 case nodeTypeNormal:
119 suffix = 1 + dot
120 case nodeTypeException:
121 suffix = 1 + len(s)
122 break loop
123 }
124 u >>= childrenBitsNodeType
125 wildcard = u&(1<<childrenBitsWildcard-1) != 0
126 if !wildcard {
127 icann = icannNode
128 }
129 130 if dot == -1 {
131 break
132 }
133 s = s[:dot]
134 }
135 if suffix == len(domain) {
136 // If no rules match, the prevailing rule is "*".
137 return domain[1+strings.LastIndexByte(domain, '.'):], icann
138 }
139 return domain[suffix:], icann
140 }
141 142 const notFound uint32 = 1<<32 - 1
143 144 // find returns the index of the node in the range [lo, hi) whose label equals
145 // label, or notFound if there is no such node. The range is assumed to be in
146 // strictly increasing node label order.
147 func find(label string, lo, hi uint32) uint32 {
148 for lo < hi {
149 mid := lo + (hi-lo)/2
150 s := nodeLabel(mid)
151 if s < label {
152 lo = mid + 1
153 } else if s == label {
154 return mid
155 } else {
156 hi = mid
157 }
158 }
159 return notFound
160 }
161 162 // nodeLabel returns the label for the i'th node.
163 func nodeLabel(i uint32) string {
164 x := nodes.get(i)
165 length := x & (1<<nodesBitsTextLength - 1)
166 x >>= nodesBitsTextLength
167 offset := x & (1<<nodesBitsTextOffset - 1)
168 return text[offset : offset+length]
169 }
170 171 // EffectiveTLDPlusOne returns the effective top level domain plus one more
172 // label. For example, the eTLD+1 for "foo.bar.golang.org" is "golang.org".
173 func EffectiveTLDPlusOne(domain string) (string, error) {
174 if strings.HasPrefix(domain, ".") || strings.HasSuffix(domain, ".") || strings.Contains(domain, "..") {
175 return "", fmt.Errorf("publicsuffix: empty label in domain %q", domain)
176 }
177 178 suffix, _ := PublicSuffix(domain)
179 if len(domain) <= len(suffix) {
180 return "", fmt.Errorf("publicsuffix: cannot derive eTLD+1 for domain %q", domain)
181 }
182 i := len(domain) - len(suffix) - 1
183 if domain[i] != '.' {
184 return "", fmt.Errorf("publicsuffix: invalid public suffix %q for domain %q", suffix, domain)
185 }
186 return domain[1+strings.LastIndexByte(domain[:i], '.'):], nil
187 }
188 189 type uint32String string
190 191 func (u uint32String) get(i uint32) uint32 {
192 off := i * 4
193 u = u[off:] // help the compiler reduce bounds checks
194 return uint32(u[3]) |
195 uint32(u[2])<<8 |
196 uint32(u[1])<<16 |
197 uint32(u[0])<<24
198 }
199 200 type uint40String string
201 202 func (u uint40String) get(i uint32) uint64 {
203 off := uint64(i * (nodesBits / 8))
204 u = u[off:] // help the compiler reduce bounds checks
205 return uint64(u[4]) |
206 uint64(u[3])<<8 |
207 uint64(u[2])<<16 |
208 uint64(u[1])<<24 |
209 uint64(u[0])<<32
210 }
211