1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 //go:generate go run gen.go gen_index.go -output tables.go
6 //go:generate go run gen_parents.go
7 8 package compact
9 10 // TODO: Remove above NOTE after:
11 // - verifying that tables are dropped correctly (most notably matcher tables).
12 13 import (
14 "strings"
15 16 "golang.org/x/text/internal/language"
17 )
18 19 // Tag represents a BCP 47 language tag. It is used to specify an instance of a
20 // specific language or locale. All language tag values are guaranteed to be
21 // well-formed.
22 type Tag struct {
23 // NOTE: exported tags will become part of the public API.
24 language ID
25 locale ID
26 full fullTag // always a language.Tag for now.
27 }
28 29 const _und = 0
30 31 type fullTag interface {
32 IsRoot() bool
33 Parent() language.Tag
34 }
35 36 // Make a compact Tag from a fully specified internal language Tag.
37 func Make(t language.Tag) (tag Tag) {
38 if region := t.TypeForKey("rg"); len(region) == 6 && region[2:] == "zzzz" {
39 if r, err := language.ParseRegion(region[:2]); err == nil {
40 tFull := t
41 t, _ = t.SetTypeForKey("rg", "")
42 // TODO: should we not consider "va" for the language tag?
43 var exact1, exact2 bool
44 tag.language, exact1 = FromTag(t)
45 t.RegionID = r
46 tag.locale, exact2 = FromTag(t)
47 if !exact1 || !exact2 {
48 tag.full = tFull
49 }
50 return tag
51 }
52 }
53 lang, ok := FromTag(t)
54 tag.language = lang
55 tag.locale = lang
56 if !ok {
57 tag.full = t
58 }
59 return tag
60 }
61 62 // Tag returns an internal language Tag version of this tag.
63 func (t Tag) Tag() language.Tag {
64 if t.full != nil {
65 return t.full.(language.Tag)
66 }
67 tag := t.language.Tag()
68 if t.language != t.locale {
69 loc := t.locale.Tag()
70 tag, _ = tag.SetTypeForKey("rg", strings.ToLower(loc.RegionID.String())+"zzzz")
71 }
72 return tag
73 }
74 75 // IsCompact reports whether this tag is fully defined in terms of ID.
76 func (t *Tag) IsCompact() bool {
77 return t.full == nil
78 }
79 80 // MayHaveVariants reports whether a tag may have variants. If it returns false
81 // it is guaranteed the tag does not have variants.
82 func (t Tag) MayHaveVariants() bool {
83 return t.full != nil || int(t.language) >= len(coreTags)
84 }
85 86 // MayHaveExtensions reports whether a tag may have extensions. If it returns
87 // false it is guaranteed the tag does not have them.
88 func (t Tag) MayHaveExtensions() bool {
89 return t.full != nil ||
90 int(t.language) >= len(coreTags) ||
91 t.language != t.locale
92 }
93 94 // IsRoot returns true if t is equal to language "und".
95 func (t Tag) IsRoot() bool {
96 if t.full != nil {
97 return t.full.IsRoot()
98 }
99 return t.language == _und
100 }
101 102 // Parent returns the CLDR parent of t. In CLDR, missing fields in data for a
103 // specific language are substituted with fields from the parent language.
104 // The parent for a language may change for newer versions of CLDR.
105 func (t Tag) Parent() Tag {
106 if t.full != nil {
107 return Make(t.full.Parent())
108 }
109 if t.language != t.locale {
110 // Simulate stripping -u-rg-xxxxxx
111 return Tag{language: t.language, locale: t.language}
112 }
113 // TODO: use parent lookup table once cycle from internal package is
114 // removed. Probably by internalizing the table and declaring this fast
115 // enough.
116 // lang := compactID(internal.Parent(uint16(t.language)))
117 lang, _ := FromTag(t.language.Tag().Parent())
118 return Tag{language: lang, locale: lang}
119 }
120 121 // nextToken returns token t and the rest of the string.
122 func nextToken(s string) (t, tail string) {
123 p := strings.Index(s[1:], "-")
124 if p == -1 {
125 return s[1:], ""
126 }
127 p++
128 return s[1:p], s[p:]
129 }
130 131 // LanguageID returns an index, where 0 <= index < NumCompactTags, for tags
132 // for which data exists in the text repository.The index will change over time
133 // and should not be stored in persistent storage. If t does not match a compact
134 // index, exact will be false and the compact index will be returned for the
135 // first match after repeatedly taking the Parent of t.
136 func LanguageID(t Tag) (id ID, exact bool) {
137 return t.language, t.full == nil
138 }
139 140 // RegionalID returns the ID for the regional variant of this tag. This index is
141 // used to indicate region-specific overrides, such as default currency, default
142 // calendar and week data, default time cycle, and default measurement system
143 // and unit preferences.
144 //
145 // For instance, the tag en-GB-u-rg-uszzzz specifies British English with US
146 // settings for currency, number formatting, etc. The CompactIndex for this tag
147 // will be that for en-GB, while the RegionalID will be the one corresponding to
148 // en-US.
149 func RegionalID(t Tag) (id ID, exact bool) {
150 return t.locale, t.full == nil
151 }
152 153 // LanguageTag returns t stripped of regional variant indicators.
154 //
155 // At the moment this means it is stripped of a regional and variant subtag "rg"
156 // and "va" in the "u" extension.
157 func (t Tag) LanguageTag() Tag {
158 if t.full == nil {
159 return Tag{language: t.language, locale: t.language}
160 }
161 tt := t.Tag()
162 tt.SetTypeForKey("rg", "")
163 tt.SetTypeForKey("va", "")
164 return Make(tt)
165 }
166 167 // RegionalTag returns the regional variant of the tag.
168 //
169 // At the moment this means that the region is set from the regional subtag
170 // "rg" in the "u" extension.
171 func (t Tag) RegionalTag() Tag {
172 rt := Tag{language: t.locale, locale: t.locale}
173 if t.full == nil {
174 return rt
175 }
176 b := language.Builder{}
177 tag := t.Tag()
178 // tag, _ = tag.SetTypeForKey("rg", "")
179 b.SetTag(t.locale.Tag())
180 if v := tag.Variants(); v != "" {
181 for _, v := range strings.Split(v, "-") {
182 b.AddVariant(v)
183 }
184 }
185 for _, e := range tag.Extensions() {
186 b.AddExt(e)
187 }
188 return t
189 }
190 191 // FromTag reports closest matching ID for an internal language Tag.
192 func FromTag(t language.Tag) (id ID, exact bool) {
193 // TODO: perhaps give more frequent tags a lower index.
194 // TODO: we could make the indexes stable. This will excluded some
195 // possibilities for optimization, so don't do this quite yet.
196 exact = true
197 198 b, s, r := t.Raw()
199 if t.HasString() {
200 if t.IsPrivateUse() {
201 // We have no entries for user-defined tags.
202 return 0, false
203 }
204 hasExtra := false
205 if t.HasVariants() {
206 if t.HasExtensions() {
207 build := language.Builder{}
208 build.SetTag(language.Tag{LangID: b, ScriptID: s, RegionID: r})
209 build.AddVariant(t.Variants())
210 exact = false
211 t = build.Make()
212 }
213 hasExtra = true
214 } else if _, ok := t.Extension('u'); ok {
215 // TODO: va may mean something else. Consider not considering it.
216 // Strip all but the 'va' entry.
217 old := t
218 variant := t.TypeForKey("va")
219 t = language.Tag{LangID: b, ScriptID: s, RegionID: r}
220 if variant != "" {
221 t, _ = t.SetTypeForKey("va", variant)
222 hasExtra = true
223 }
224 exact = old == t
225 } else {
226 exact = false
227 }
228 if hasExtra {
229 // We have some variants.
230 for i, s := range specialTags {
231 if s == t {
232 return ID(i + len(coreTags)), exact
233 }
234 }
235 exact = false
236 }
237 }
238 if x, ok := getCoreIndex(t); ok {
239 return x, exact
240 }
241 exact = false
242 if r != 0 && s == 0 {
243 // Deal with cases where an extra script is inserted for the region.
244 t, _ := t.Maximize()
245 if x, ok := getCoreIndex(t); ok {
246 return x, exact
247 }
248 }
249 for t = t.Parent(); t != root; t = t.Parent() {
250 // No variants specified: just compare core components.
251 // The key has the form lllssrrr, where l, s, and r are nibbles for
252 // respectively the langID, scriptID, and regionID.
253 if x, ok := getCoreIndex(t); ok {
254 return x, exact
255 }
256 }
257 return 0, exact
258 }
259 260 var root = language.Tag{}
261