1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package language
6 7 import (
8 "errors"
9 "sort"
10 "strconv"
11 "strings"
12 13 "golang.org/x/text/internal/language"
14 )
15 16 // ValueError is returned by any of the parsing functions when the
17 // input is well-formed but the respective subtag is not recognized
18 // as a valid value.
19 type ValueError interface {
20 error
21 22 // Subtag returns the subtag for which the error occurred.
23 Subtag() string
24 }
25 26 // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
27 // failed it returns an error and any part of the tag that could be parsed.
28 // If parsing succeeded but an unknown value was found, it returns
29 // ValueError. The Tag returned in this case is just stripped of the unknown
30 // value. All other values are preserved. It accepts tags in the BCP 47 format
31 // and extensions to this standard defined in
32 // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
33 // The resulting tag is canonicalized using the default canonicalization type.
34 func Parse(s string) (t Tag, err error) {
35 return Default.Parse(s)
36 }
37 38 // Parse parses the given BCP 47 string and returns a valid Tag. If parsing
39 // failed it returns an error and any part of the tag that could be parsed.
40 // If parsing succeeded but an unknown value was found, it returns
41 // ValueError. The Tag returned in this case is just stripped of the unknown
42 // value. All other values are preserved. It accepts tags in the BCP 47 format
43 // and extensions to this standard defined in
44 // https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
45 // The resulting tag is canonicalized using the canonicalization type c.
46 func (c CanonType) Parse(s string) (t Tag, err error) {
47 defer func() {
48 if recover() != nil {
49 t = Tag{}
50 err = language.ErrSyntax
51 }
52 }()
53 54 tt, err := language.Parse(s)
55 if err != nil {
56 return makeTag(tt), err
57 }
58 tt, changed := canonicalize(c, tt)
59 if changed {
60 tt.RemakeString()
61 }
62 return makeTag(tt), nil
63 }
64 65 // Compose creates a Tag from individual parts, which may be of type Tag, Base,
66 // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
67 // Base, Script or Region or slice of type Variant or Extension is passed more
68 // than once, the latter will overwrite the former. Variants and Extensions are
69 // accumulated, but if two extensions of the same type are passed, the latter
70 // will replace the former. For -u extensions, though, the key-type pairs are
71 // added, where later values overwrite older ones. A Tag overwrites all former
72 // values and typically only makes sense as the first argument. The resulting
73 // tag is returned after canonicalizing using the Default CanonType. If one or
74 // more errors are encountered, one of the errors is returned.
75 func Compose(part ...interface{}) (t Tag, err error) {
76 return Default.Compose(part...)
77 }
78 79 // Compose creates a Tag from individual parts, which may be of type Tag, Base,
80 // Script, Region, Variant, []Variant, Extension, []Extension or error. If a
81 // Base, Script or Region or slice of type Variant or Extension is passed more
82 // than once, the latter will overwrite the former. Variants and Extensions are
83 // accumulated, but if two extensions of the same type are passed, the latter
84 // will replace the former. For -u extensions, though, the key-type pairs are
85 // added, where later values overwrite older ones. A Tag overwrites all former
86 // values and typically only makes sense as the first argument. The resulting
87 // tag is returned after canonicalizing using CanonType c. If one or more errors
88 // are encountered, one of the errors is returned.
89 func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
90 defer func() {
91 if recover() != nil {
92 t = Tag{}
93 err = language.ErrSyntax
94 }
95 }()
96 97 var b language.Builder
98 if err = update(&b, part...); err != nil {
99 return und, err
100 }
101 b.Tag, _ = canonicalize(c, b.Tag)
102 return makeTag(b.Make()), err
103 }
104 105 var errInvalidArgument = errors.New("invalid Extension or Variant")
106 107 func update(b *language.Builder, part ...interface{}) (err error) {
108 for _, x := range part {
109 switch v := x.(type) {
110 case Tag:
111 b.SetTag(v.tag())
112 case Base:
113 b.Tag.LangID = v.langID
114 case Script:
115 b.Tag.ScriptID = v.scriptID
116 case Region:
117 b.Tag.RegionID = v.regionID
118 case Variant:
119 if v.variant == "" {
120 err = errInvalidArgument
121 break
122 }
123 b.AddVariant(v.variant)
124 case Extension:
125 if v.s == "" {
126 err = errInvalidArgument
127 break
128 }
129 b.SetExt(v.s)
130 case []Variant:
131 b.ClearVariants()
132 for _, v := range v {
133 b.AddVariant(v.variant)
134 }
135 case []Extension:
136 b.ClearExtensions()
137 for _, e := range v {
138 b.SetExt(e.s)
139 }
140 // TODO: support parsing of raw strings based on morphology or just extensions?
141 case error:
142 if v != nil {
143 err = v
144 }
145 }
146 }
147 return
148 }
149 150 var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
151 var errTagListTooLarge = errors.New("tag list exceeds max length")
152 153 // ParseAcceptLanguage parses the contents of an Accept-Language header as
154 // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
155 // a list of corresponding quality weights. It is more permissive than RFC 2616
156 // and may return non-nil slices even if the input is not valid.
157 // The Tags will be sorted by highest weight first and then by first occurrence.
158 // Tags with a weight of zero will be dropped. An error will be returned if the
159 // input could not be parsed.
160 func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
161 defer func() {
162 if recover() != nil {
163 tag = nil
164 q = nil
165 err = language.ErrSyntax
166 }
167 }()
168 169 if strings.Count(s, "-") > 1000 {
170 return nil, nil, errTagListTooLarge
171 }
172 173 var entry string
174 for s != "" {
175 if entry, s = split(s, ','); entry == "" {
176 continue
177 }
178 179 entry, weight := split(entry, ';')
180 181 // Scan the language.
182 t, err := Parse(entry)
183 if err != nil {
184 id, ok := acceptFallback[entry]
185 if !ok {
186 return nil, nil, err
187 }
188 t = makeTag(language.Tag{LangID: id})
189 }
190 191 // Scan the optional weight.
192 w := 1.0
193 if weight != "" {
194 weight = consume(weight, 'q')
195 weight = consume(weight, '=')
196 // consume returns the empty string when a token could not be
197 // consumed, resulting in an error for ParseFloat.
198 if w, err = strconv.ParseFloat(weight, 32); err != nil {
199 return nil, nil, errInvalidWeight
200 }
201 // Drop tags with a quality weight of 0.
202 if w <= 0 {
203 continue
204 }
205 }
206 207 tag = append(tag, t)
208 q = append(q, float32(w))
209 }
210 sort.Stable(&tagSort{tag, q})
211 return tag, q, nil
212 }
213 214 // consume removes a leading token c from s and returns the result or the empty
215 // string if there is no such token.
216 func consume(s string, c byte) string {
217 if s == "" || s[0] != c {
218 return ""
219 }
220 return strings.TrimSpace(s[1:])
221 }
222 223 func split(s string, c byte) (head, tail string) {
224 if i := strings.IndexByte(s, c); i >= 0 {
225 return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
226 }
227 return strings.TrimSpace(s), ""
228 }
229 230 // Add hack mapping to deal with a small number of cases that occur
231 // in Accept-Language (with reasonable frequency).
232 var acceptFallback = map[string]language.Language{
233 "english": _en,
234 "deutsch": _de,
235 "italian": _it,
236 "french": _fr,
237 "*": _mul, // defined in the spec to match all languages.
238 }
239 240 type tagSort struct {
241 tag []Tag
242 q []float32
243 }
244 245 func (s *tagSort) Len() int {
246 return len(s.q)
247 }
248 249 func (s *tagSort) Less(i, j int) bool {
250 return s.q[i] > s.q[j]
251 }
252 253 func (s *tagSort) Swap(i, j int) {
254 s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
255 s.q[i], s.q[j] = s.q[j], s.q[i]
256 }
257