charset.go raw

   1  // Package charset provides functions to decode and encode charsets.
   2  //
   3  // It imports all supported charsets, which adds about 1MiB to binaries size.
   4  // Importing the package automatically sets message.CharsetReader.
   5  package charset
   6  
   7  import (
   8  	"fmt"
   9  	"io"
  10  	"strings"
  11  
  12  	"github.com/emersion/go-message"
  13  	"golang.org/x/text/encoding"
  14  	"golang.org/x/text/encoding/charmap"
  15  	"golang.org/x/text/encoding/htmlindex"
  16  	"golang.org/x/text/encoding/ianaindex"
  17  	"golang.org/x/text/encoding/unicode"
  18  )
  19  
  20  // Quirks table for charsets not handled by ianaindex
  21  //
  22  // A nil entry disables the charset.
  23  //
  24  // For aliases, see
  25  // https://www.iana.org/assignments/character-sets/character-sets.xhtml
  26  var charsets = map[string]encoding.Encoding{
  27  	"ansi_x3.110-1983": charmap.ISO8859_1, // see RFC 1345 page 62, mostly superset of ISO 8859-1
  28  	"x-utf_8j":         unicode.UTF8,      // alias for UTF-8, see https://icu4c-demos.unicode.org/icu-bin/convexp?s=ALL
  29  }
  30  
  31  func init() {
  32  	message.CharsetReader = Reader
  33  }
  34  
  35  // Reader returns an io.Reader that converts the provided charset to UTF-8.
  36  func Reader(charset string, input io.Reader) (io.Reader, error) {
  37  	var err error
  38  	enc, ok := charsets[strings.ToLower(charset)]
  39  	if ok && enc == nil {
  40  		return nil, fmt.Errorf("charset %q: charset is disabled", charset)
  41  	} else if !ok {
  42  		enc, err = ianaindex.MIME.Encoding(charset)
  43  	}
  44  	if enc == nil {
  45  		enc, err = ianaindex.MIME.Encoding("cs" + charset)
  46  	}
  47  	if enc == nil {
  48  		enc, err = htmlindex.Get(charset)
  49  	}
  50  	if err != nil {
  51  		return nil, fmt.Errorf("charset %q: %v", charset, err)
  52  	}
  53  	// See https://github.com/golang/go/issues/19421
  54  	if enc == nil {
  55  		return nil, fmt.Errorf("charset %q: unsupported charset", charset)
  56  	}
  57  	return enc.NewDecoder().Reader(input), nil
  58  }
  59  
  60  // RegisterEncoding registers an encoding. This is intended to be called from
  61  // the init function in packages that want to support additional charsets.
  62  func RegisterEncoding(name string, enc encoding.Encoding) {
  63  	charsets[name] = enc
  64  }
  65