1 // Copyright 2015 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 //go:generate go run gen.go
6 7 // Package ianaindex maps names to Encodings as specified by the IANA registry.
8 // This includes both the MIME and IANA names.
9 //
10 // See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
11 // more details.
12 package ianaindex
13 14 import (
15 "errors"
16 "sort"
17 "strings"
18 19 "golang.org/x/text/encoding"
20 "golang.org/x/text/encoding/charmap"
21 "golang.org/x/text/encoding/internal/identifier"
22 "golang.org/x/text/encoding/japanese"
23 "golang.org/x/text/encoding/korean"
24 "golang.org/x/text/encoding/simplifiedchinese"
25 "golang.org/x/text/encoding/traditionalchinese"
26 "golang.org/x/text/encoding/unicode"
27 )
28 29 // TODO: remove the "Status... incomplete" in the package doc comment.
30 // TODO: allow users to specify their own aliases?
31 // TODO: allow users to specify their own indexes?
32 // TODO: allow canonicalizing names
33 34 // NOTE: only use these top-level variables if we can get the linker to drop
35 // the indexes when they are not used. Make them a function or perhaps only
36 // support MIME otherwise.
37 38 var (
39 // MIME is an index to map MIME names.
40 MIME *Index = mime
41 42 // IANA is an index that supports all names and aliases using IANA names as
43 // the canonical identifier.
44 IANA *Index = iana
45 46 // MIB is an index that associates the MIB display name with an Encoding.
47 MIB *Index = mib
48 49 mime = &Index{mimeName, ianaToMIB, ianaAliases, encodings[:]}
50 iana = &Index{ianaName, ianaToMIB, ianaAliases, encodings[:]}
51 mib = &Index{mibName, ianaToMIB, ianaAliases, encodings[:]}
52 )
53 54 // Index maps names registered by IANA to Encodings.
55 // Currently different Indexes only differ in the names they return for
56 // encodings. In the future they may also differ in supported aliases.
57 type Index struct {
58 names func(i int) string
59 toMIB []identifier.MIB // Sorted slice of supported MIBs
60 alias map[string]int
61 enc []encoding.Encoding
62 }
63 64 var (
65 errInvalidName = errors.New("ianaindex: invalid encoding name")
66 errUnknown = errors.New("ianaindex: unknown Encoding")
67 errUnsupported = errors.New("ianaindex: unsupported Encoding")
68 )
69 70 // Encoding returns an Encoding for IANA-registered names. Matching is
71 // case-insensitive.
72 //
73 // If the provided name doesn't match a IANA-registered charset, an error is
74 // returned. If the name matches a IANA-registered charset but isn't supported,
75 // a nil encoding and a nil error are returned.
76 func (x *Index) Encoding(name string) (encoding.Encoding, error) {
77 name = strings.TrimSpace(name)
78 // First try without lowercasing (possibly creating an allocation).
79 i, ok := x.alias[name]
80 if !ok {
81 i, ok = x.alias[strings.ToLower(name)]
82 if !ok {
83 return nil, errInvalidName
84 }
85 }
86 return x.enc[i], nil
87 }
88 89 // Name reports the canonical name of the given Encoding. It will return an
90 // error if the e is not associated with a known encoding scheme.
91 func (x *Index) Name(e encoding.Encoding) (string, error) {
92 id, ok := e.(identifier.Interface)
93 if !ok {
94 return "", errUnknown
95 }
96 mib, _ := id.ID()
97 if mib == 0 {
98 return "", errUnknown
99 }
100 v := findMIB(x.toMIB, mib)
101 if v == -1 {
102 return "", errUnsupported
103 }
104 return x.names(v), nil
105 }
106 107 // TODO: the coverage of this index is rather spotty. Allowing users to set
108 // encodings would allow:
109 // - users to increase coverage
110 // - allow a partially loaded set of encodings in case the user doesn't need to
111 // them all.
112 // - write an OS-specific wrapper for supported encodings and set them.
113 // The exact definition of Set depends a bit on if and how we want to let users
114 // write their own Encoding implementations. Also, it is not possible yet to
115 // only partially load the encodings without doing some refactoring. Until this
116 // is solved, we might as well not support Set.
117 // // Set sets the e to be used for the encoding scheme identified by name. Only
118 // // canonical names may be used. An empty name assigns e to its internally
119 // // associated encoding scheme.
120 // func (x *Index) Set(name string, e encoding.Encoding) error {
121 // panic("TODO: implement")
122 // }
123 124 func findMIB(x []identifier.MIB, mib identifier.MIB) int {
125 i := sort.Search(len(x), func(i int) bool { return x[i] >= mib })
126 if i < len(x) && x[i] == mib {
127 return i
128 }
129 return -1
130 }
131 132 const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.
133 134 func mimeName(x int) string {
135 n := ianaNames[x]
136 // See gen.go for a description of the encoding.
137 if n[0] <= maxMIMENameLen {
138 return n[1:n[0]]
139 }
140 return n
141 }
142 143 func ianaName(x int) string {
144 n := ianaNames[x]
145 // See gen.go for a description of the encoding.
146 if n[0] <= maxMIMENameLen {
147 return n[n[0]:]
148 }
149 return n
150 }
151 152 func mibName(x int) string {
153 return mibNames[x]
154 }
155 156 var encodings = [numIANA]encoding.Encoding{
157 enc3: asciiEnc,
158 enc106: unicode.UTF8,
159 enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
160 enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
161 enc1014: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
162 enc2028: charmap.CodePage037,
163 enc2011: charmap.CodePage437,
164 enc2009: charmap.CodePage850,
165 enc2010: charmap.CodePage852,
166 enc2046: charmap.CodePage855,
167 enc2089: charmap.CodePage858,
168 enc2048: charmap.CodePage860,
169 enc2013: charmap.CodePage862,
170 enc2050: charmap.CodePage863,
171 enc2052: charmap.CodePage865,
172 enc2086: charmap.CodePage866,
173 enc2102: charmap.CodePage1047,
174 enc2091: charmap.CodePage1140,
175 enc4: charmap.ISO8859_1,
176 enc5: charmap.ISO8859_2,
177 enc6: charmap.ISO8859_3,
178 enc7: charmap.ISO8859_4,
179 enc8: charmap.ISO8859_5,
180 enc9: charmap.ISO8859_6,
181 enc81: charmap.ISO8859_6E,
182 enc82: charmap.ISO8859_6I,
183 enc10: charmap.ISO8859_7,
184 enc11: charmap.ISO8859_8,
185 enc84: charmap.ISO8859_8E,
186 enc85: charmap.ISO8859_8I,
187 enc12: charmap.ISO8859_9,
188 enc13: charmap.ISO8859_10,
189 enc109: charmap.ISO8859_13,
190 enc110: charmap.ISO8859_14,
191 enc111: charmap.ISO8859_15,
192 enc112: charmap.ISO8859_16,
193 enc2084: charmap.KOI8R,
194 enc2088: charmap.KOI8U,
195 enc2027: charmap.Macintosh,
196 enc2109: charmap.Windows874,
197 enc2250: charmap.Windows1250,
198 enc2251: charmap.Windows1251,
199 enc2252: charmap.Windows1252,
200 enc2253: charmap.Windows1253,
201 enc2254: charmap.Windows1254,
202 enc2255: charmap.Windows1255,
203 enc2256: charmap.Windows1256,
204 enc2257: charmap.Windows1257,
205 enc2258: charmap.Windows1258,
206 enc18: japanese.EUCJP,
207 enc39: japanese.ISO2022JP,
208 enc17: japanese.ShiftJIS,
209 enc38: korean.EUCKR,
210 enc114: simplifiedchinese.GB18030,
211 enc113: simplifiedchinese.GBK,
212 enc2085: simplifiedchinese.HZGB2312,
213 enc2026: traditionalchinese.Big5,
214 }
215