charset.go raw
1 // Package charset provides functions to decode and encode charsets.
2 //
3 // It imports all supported charsets, which adds about 1MiB to binaries size.
4 // Importing the package automatically sets message.CharsetReader.
5 package charset
6
7 import (
8 "fmt"
9 "io"
10 "strings"
11
12 "github.com/emersion/go-message"
13 "golang.org/x/text/encoding"
14 "golang.org/x/text/encoding/charmap"
15 "golang.org/x/text/encoding/htmlindex"
16 "golang.org/x/text/encoding/ianaindex"
17 "golang.org/x/text/encoding/unicode"
18 )
19
20 // Quirks table for charsets not handled by ianaindex
21 //
22 // A nil entry disables the charset.
23 //
24 // For aliases, see
25 // https://www.iana.org/assignments/character-sets/character-sets.xhtml
26 var charsets = map[string]encoding.Encoding{
27 "ansi_x3.110-1983": charmap.ISO8859_1, // see RFC 1345 page 62, mostly superset of ISO 8859-1
28 "x-utf_8j": unicode.UTF8, // alias for UTF-8, see https://icu4c-demos.unicode.org/icu-bin/convexp?s=ALL
29 }
30
31 func init() {
32 message.CharsetReader = Reader
33 }
34
35 // Reader returns an io.Reader that converts the provided charset to UTF-8.
36 func Reader(charset string, input io.Reader) (io.Reader, error) {
37 var err error
38 enc, ok := charsets[strings.ToLower(charset)]
39 if ok && enc == nil {
40 return nil, fmt.Errorf("charset %q: charset is disabled", charset)
41 } else if !ok {
42 enc, err = ianaindex.MIME.Encoding(charset)
43 }
44 if enc == nil {
45 enc, err = ianaindex.MIME.Encoding("cs" + charset)
46 }
47 if enc == nil {
48 enc, err = htmlindex.Get(charset)
49 }
50 if err != nil {
51 return nil, fmt.Errorf("charset %q: %v", charset, err)
52 }
53 // See https://github.com/golang/go/issues/19421
54 if enc == nil {
55 return nil, fmt.Errorf("charset %q: unsupported charset", charset)
56 }
57 return enc.NewDecoder().Reader(input), nil
58 }
59
60 // RegisterEncoding registers an encoding. This is intended to be called from
61 // the init function in packages that want to support additional charsets.
62 func RegisterEncoding(name string, enc encoding.Encoding) {
63 charsets[name] = enc
64 }
65