entity.go raw

   1  package message
   2  
   3  import (
   4  	"bufio"
   5  	"errors"
   6  	"io"
   7  	"math"
   8  	"strings"
   9  
  10  	"github.com/emersion/go-message/textproto"
  11  )
  12  
  13  // An Entity is either a whole message or a one of the parts in the body of a
  14  // multipart entity.
  15  type Entity struct {
  16  	Header Header    // The entity's header.
  17  	Body   io.Reader // The decoded entity's body.
  18  
  19  	mediaType   string
  20  	mediaParams map[string]string
  21  }
  22  
  23  // New makes a new message with the provided header and body. The entity's
  24  // transfer encoding and charset are automatically decoded to UTF-8.
  25  //
  26  // If the message uses an unknown transfer encoding or charset, New returns an
  27  // error that verifies IsUnknownCharset, but also returns an Entity that can
  28  // be read.
  29  func New(header Header, body io.Reader) (*Entity, error) {
  30  	var err error
  31  
  32  	mediaType, mediaParams, _ := header.ContentType()
  33  
  34  	// QUIRK: RFC 2045 section 6.4 specifies that multipart messages can't have
  35  	// a Content-Transfer-Encoding other than "7bit", "8bit" or "binary".
  36  	// However some messages in the wild are non-conformant and have it set to
  37  	// e.g. "quoted-printable". So we just ignore it for multipart.
  38  	// See https://github.com/emersion/go-message/issues/48
  39  	if !strings.HasPrefix(mediaType, "multipart/") {
  40  		enc := header.Get("Content-Transfer-Encoding")
  41  		if decoded, encErr := encodingReader(enc, body); encErr != nil {
  42  			err = UnknownEncodingError{encErr}
  43  		} else {
  44  			body = decoded
  45  		}
  46  	}
  47  
  48  	// RFC 2046 section 4.1.2: charset only applies to text/*
  49  	if strings.HasPrefix(mediaType, "text/") {
  50  		if ch, ok := mediaParams["charset"]; ok {
  51  			if converted, charsetErr := charsetReader(ch, body); charsetErr != nil {
  52  				err = UnknownCharsetError{charsetErr}
  53  			} else {
  54  				body = converted
  55  			}
  56  		}
  57  	}
  58  
  59  	return &Entity{
  60  		Header:      header,
  61  		Body:        body,
  62  		mediaType:   mediaType,
  63  		mediaParams: mediaParams,
  64  	}, err
  65  }
  66  
  67  // NewMultipart makes a new multipart message with the provided header and
  68  // parts. The Content-Type header must begin with "multipart/".
  69  //
  70  // If the message uses an unknown transfer encoding, NewMultipart returns an
  71  // error that verifies IsUnknownCharset, but also returns an Entity that can
  72  // be read.
  73  func NewMultipart(header Header, parts []*Entity) (*Entity, error) {
  74  	r := &multipartBody{
  75  		header: header,
  76  		parts:  parts,
  77  	}
  78  
  79  	return New(header, r)
  80  }
  81  
  82  const defaultMaxHeaderBytes = 1 << 20 // 1 MB
  83  
  84  var errHeaderTooBig = errors.New("message: header exceeds maximum size")
  85  
  86  // limitedReader is the same as io.LimitedReader, but returns a custom error.
  87  type limitedReader struct {
  88  	R io.Reader
  89  	N int64
  90  }
  91  
  92  func (lr *limitedReader) Read(p []byte) (int, error) {
  93  	if lr.N <= 0 {
  94  		return 0, errHeaderTooBig
  95  	}
  96  	if int64(len(p)) > lr.N {
  97  		p = p[0:lr.N]
  98  	}
  99  	n, err := lr.R.Read(p)
 100  	lr.N -= int64(n)
 101  	return n, err
 102  }
 103  
 104  // ReadOptions are options for ReadWithOptions.
 105  type ReadOptions struct {
 106  	// MaxHeaderBytes limits the maximum permissible size of a message header
 107  	// block. If exceeded, an error will be returned.
 108  	//
 109  	// Set to -1 for no limit, set to 0 for the default value (1MB).
 110  	MaxHeaderBytes int64
 111  }
 112  
 113  // withDefaults returns a sanitised version of the options with defaults/special
 114  // values accounted for.
 115  func (o *ReadOptions) withDefaults() *ReadOptions {
 116  	var out ReadOptions
 117  	if o != nil {
 118  		out = *o
 119  	}
 120  	if out.MaxHeaderBytes == 0 {
 121  		out.MaxHeaderBytes = defaultMaxHeaderBytes
 122  	} else if out.MaxHeaderBytes < 0 {
 123  		out.MaxHeaderBytes = math.MaxInt64
 124  	}
 125  	return &out
 126  }
 127  
 128  // ReadWithOptions see Read, but allows overriding some parameters with
 129  // ReadOptions.
 130  //
 131  // If the message uses an unknown transfer encoding or charset, ReadWithOptions
 132  // returns an error that verifies IsUnknownCharset or IsUnknownEncoding, but
 133  // also returns an Entity that can be read.
 134  func ReadWithOptions(r io.Reader, opts *ReadOptions) (*Entity, error) {
 135  	opts = opts.withDefaults()
 136  
 137  	lr := &limitedReader{R: r, N: opts.MaxHeaderBytes}
 138  	br := bufio.NewReader(lr)
 139  
 140  	h, err := textproto.ReadHeader(br)
 141  	if err != nil {
 142  		return nil, err
 143  	}
 144  
 145  	lr.N = math.MaxInt64
 146  
 147  	return New(Header{h}, br)
 148  }
 149  
 150  // Read reads a message from r. The message's encoding and charset are
 151  // automatically decoded to raw UTF-8. Note that this function only reads the
 152  // message header.
 153  //
 154  // If the message uses an unknown transfer encoding or charset, Read returns an
 155  // error that verifies IsUnknownCharset or IsUnknownEncoding, but also returns
 156  // an Entity that can be read.
 157  func Read(r io.Reader) (*Entity, error) {
 158  	return ReadWithOptions(r, nil)
 159  }
 160  
 161  // MultipartReader returns a MultipartReader that reads parts from this entity's
 162  // body. If this entity is not multipart, it returns nil.
 163  func (e *Entity) MultipartReader() MultipartReader {
 164  	if !strings.HasPrefix(e.mediaType, "multipart/") {
 165  		return nil
 166  	}
 167  	if mb, ok := e.Body.(*multipartBody); ok {
 168  		return mb
 169  	}
 170  	return &multipartReader{textproto.NewMultipartReader(e.Body, e.mediaParams["boundary"])}
 171  }
 172  
 173  // writeBodyTo writes this entity's body to w (without the header).
 174  func (e *Entity) writeBodyTo(w *Writer) error {
 175  	var err error
 176  	if mb, ok := e.Body.(*multipartBody); ok {
 177  		err = mb.writeBodyTo(w)
 178  	} else {
 179  		_, err = io.Copy(w, e.Body)
 180  	}
 181  	return err
 182  }
 183  
 184  // WriteTo writes this entity's header and body to w.
 185  func (e *Entity) WriteTo(w io.Writer) error {
 186  	ew, err := CreateWriter(w, e.Header)
 187  	if err != nil {
 188  		return err
 189  	}
 190  
 191  	if err := e.writeBodyTo(ew); err != nil {
 192  		ew.Close()
 193  		return err
 194  	}
 195  
 196  	return ew.Close()
 197  }
 198  
 199  // WalkFunc is the type of the function called for each part visited by Walk.
 200  //
 201  // The path argument is a list of multipart indices leading to the part. The
 202  // root part has a nil path.
 203  //
 204  // If there was an encoding error walking to a part, the incoming error will
 205  // describe the problem and the function can decide how to handle that error.
 206  //
 207  // Unlike IMAP part paths, indices start from 0 (instead of 1) and a
 208  // non-multipart message has a nil path (instead of {1}).
 209  //
 210  // If an error is returned, processing stops.
 211  type WalkFunc func(path []int, entity *Entity, err error) error
 212  
 213  // Walk walks the entity's multipart tree, calling walkFunc for each part in
 214  // the tree, including the root entity.
 215  //
 216  // Walk consumes the entity.
 217  func (e *Entity) Walk(walkFunc WalkFunc) error {
 218  	var multipartReaders []MultipartReader
 219  	var path []int
 220  	part := e
 221  	for {
 222  		var err error
 223  		if part == nil {
 224  			if len(multipartReaders) == 0 {
 225  				break
 226  			}
 227  
 228  			// Get the next part from the last multipart reader
 229  			mr := multipartReaders[len(multipartReaders)-1]
 230  			part, err = mr.NextPart()
 231  			if err == io.EOF {
 232  				multipartReaders = multipartReaders[:len(multipartReaders)-1]
 233  				path = path[:len(path)-1]
 234  				continue
 235  			} else if IsUnknownEncoding(err) || IsUnknownCharset(err) {
 236  				// Forward the error to walkFunc
 237  			} else if err != nil {
 238  				return err
 239  			}
 240  
 241  			path[len(path)-1]++
 242  		}
 243  
 244  		// Copy the path since we'll mutate it on the next iteration
 245  		var pathCopy []int
 246  		if len(path) > 0 {
 247  			pathCopy = make([]int, len(path))
 248  			copy(pathCopy, path)
 249  		}
 250  
 251  		if err := walkFunc(pathCopy, part, err); err != nil {
 252  			return err
 253  		}
 254  
 255  		if mr := part.MultipartReader(); mr != nil {
 256  			multipartReaders = append(multipartReaders, mr)
 257  			path = append(path, -1)
 258  		}
 259  
 260  		part = nil
 261  	}
 262  
 263  	return nil
 264  }
 265