1 package message
2 3 import (
4 "bufio"
5 "errors"
6 "io"
7 "math"
8 "strings"
9 10 "github.com/emersion/go-message/textproto"
11 )
12 13 // An Entity is either a whole message or a one of the parts in the body of a
14 // multipart entity.
15 type Entity struct {
16 Header Header // The entity's header.
17 Body io.Reader // The decoded entity's body.
18 19 mediaType string
20 mediaParams map[string]string
21 }
22 23 // New makes a new message with the provided header and body. The entity's
24 // transfer encoding and charset are automatically decoded to UTF-8.
25 //
26 // If the message uses an unknown transfer encoding or charset, New returns an
27 // error that verifies IsUnknownCharset, but also returns an Entity that can
28 // be read.
29 func New(header Header, body io.Reader) (*Entity, error) {
30 var err error
31 32 mediaType, mediaParams, _ := header.ContentType()
33 34 // QUIRK: RFC 2045 section 6.4 specifies that multipart messages can't have
35 // a Content-Transfer-Encoding other than "7bit", "8bit" or "binary".
36 // However some messages in the wild are non-conformant and have it set to
37 // e.g. "quoted-printable". So we just ignore it for multipart.
38 // See https://github.com/emersion/go-message/issues/48
39 if !strings.HasPrefix(mediaType, "multipart/") {
40 enc := header.Get("Content-Transfer-Encoding")
41 if decoded, encErr := encodingReader(enc, body); encErr != nil {
42 err = UnknownEncodingError{encErr}
43 } else {
44 body = decoded
45 }
46 }
47 48 // RFC 2046 section 4.1.2: charset only applies to text/*
49 if strings.HasPrefix(mediaType, "text/") {
50 if ch, ok := mediaParams["charset"]; ok {
51 if converted, charsetErr := charsetReader(ch, body); charsetErr != nil {
52 err = UnknownCharsetError{charsetErr}
53 } else {
54 body = converted
55 }
56 }
57 }
58 59 return &Entity{
60 Header: header,
61 Body: body,
62 mediaType: mediaType,
63 mediaParams: mediaParams,
64 }, err
65 }
66 67 // NewMultipart makes a new multipart message with the provided header and
68 // parts. The Content-Type header must begin with "multipart/".
69 //
70 // If the message uses an unknown transfer encoding, NewMultipart returns an
71 // error that verifies IsUnknownCharset, but also returns an Entity that can
72 // be read.
73 func NewMultipart(header Header, parts []*Entity) (*Entity, error) {
74 r := &multipartBody{
75 header: header,
76 parts: parts,
77 }
78 79 return New(header, r)
80 }
81 82 const defaultMaxHeaderBytes = 1 << 20 // 1 MB
83 84 var errHeaderTooBig = errors.New("message: header exceeds maximum size")
85 86 // limitedReader is the same as io.LimitedReader, but returns a custom error.
87 type limitedReader struct {
88 R io.Reader
89 N int64
90 }
91 92 func (lr *limitedReader) Read(p []byte) (int, error) {
93 if lr.N <= 0 {
94 return 0, errHeaderTooBig
95 }
96 if int64(len(p)) > lr.N {
97 p = p[0:lr.N]
98 }
99 n, err := lr.R.Read(p)
100 lr.N -= int64(n)
101 return n, err
102 }
103 104 // ReadOptions are options for ReadWithOptions.
105 type ReadOptions struct {
106 // MaxHeaderBytes limits the maximum permissible size of a message header
107 // block. If exceeded, an error will be returned.
108 //
109 // Set to -1 for no limit, set to 0 for the default value (1MB).
110 MaxHeaderBytes int64
111 }
112 113 // withDefaults returns a sanitised version of the options with defaults/special
114 // values accounted for.
115 func (o *ReadOptions) withDefaults() *ReadOptions {
116 var out ReadOptions
117 if o != nil {
118 out = *o
119 }
120 if out.MaxHeaderBytes == 0 {
121 out.MaxHeaderBytes = defaultMaxHeaderBytes
122 } else if out.MaxHeaderBytes < 0 {
123 out.MaxHeaderBytes = math.MaxInt64
124 }
125 return &out
126 }
127 128 // ReadWithOptions see Read, but allows overriding some parameters with
129 // ReadOptions.
130 //
131 // If the message uses an unknown transfer encoding or charset, ReadWithOptions
132 // returns an error that verifies IsUnknownCharset or IsUnknownEncoding, but
133 // also returns an Entity that can be read.
134 func ReadWithOptions(r io.Reader, opts *ReadOptions) (*Entity, error) {
135 opts = opts.withDefaults()
136 137 lr := &limitedReader{R: r, N: opts.MaxHeaderBytes}
138 br := bufio.NewReader(lr)
139 140 h, err := textproto.ReadHeader(br)
141 if err != nil {
142 return nil, err
143 }
144 145 lr.N = math.MaxInt64
146 147 return New(Header{h}, br)
148 }
149 150 // Read reads a message from r. The message's encoding and charset are
151 // automatically decoded to raw UTF-8. Note that this function only reads the
152 // message header.
153 //
154 // If the message uses an unknown transfer encoding or charset, Read returns an
155 // error that verifies IsUnknownCharset or IsUnknownEncoding, but also returns
156 // an Entity that can be read.
157 func Read(r io.Reader) (*Entity, error) {
158 return ReadWithOptions(r, nil)
159 }
160 161 // MultipartReader returns a MultipartReader that reads parts from this entity's
162 // body. If this entity is not multipart, it returns nil.
163 func (e *Entity) MultipartReader() MultipartReader {
164 if !strings.HasPrefix(e.mediaType, "multipart/") {
165 return nil
166 }
167 if mb, ok := e.Body.(*multipartBody); ok {
168 return mb
169 }
170 return &multipartReader{textproto.NewMultipartReader(e.Body, e.mediaParams["boundary"])}
171 }
172 173 // writeBodyTo writes this entity's body to w (without the header).
174 func (e *Entity) writeBodyTo(w *Writer) error {
175 var err error
176 if mb, ok := e.Body.(*multipartBody); ok {
177 err = mb.writeBodyTo(w)
178 } else {
179 _, err = io.Copy(w, e.Body)
180 }
181 return err
182 }
183 184 // WriteTo writes this entity's header and body to w.
185 func (e *Entity) WriteTo(w io.Writer) error {
186 ew, err := CreateWriter(w, e.Header)
187 if err != nil {
188 return err
189 }
190 191 if err := e.writeBodyTo(ew); err != nil {
192 ew.Close()
193 return err
194 }
195 196 return ew.Close()
197 }
198 199 // WalkFunc is the type of the function called for each part visited by Walk.
200 //
201 // The path argument is a list of multipart indices leading to the part. The
202 // root part has a nil path.
203 //
204 // If there was an encoding error walking to a part, the incoming error will
205 // describe the problem and the function can decide how to handle that error.
206 //
207 // Unlike IMAP part paths, indices start from 0 (instead of 1) and a
208 // non-multipart message has a nil path (instead of {1}).
209 //
210 // If an error is returned, processing stops.
211 type WalkFunc func(path []int, entity *Entity, err error) error
212 213 // Walk walks the entity's multipart tree, calling walkFunc for each part in
214 // the tree, including the root entity.
215 //
216 // Walk consumes the entity.
217 func (e *Entity) Walk(walkFunc WalkFunc) error {
218 var multipartReaders []MultipartReader
219 var path []int
220 part := e
221 for {
222 var err error
223 if part == nil {
224 if len(multipartReaders) == 0 {
225 break
226 }
227 228 // Get the next part from the last multipart reader
229 mr := multipartReaders[len(multipartReaders)-1]
230 part, err = mr.NextPart()
231 if err == io.EOF {
232 multipartReaders = multipartReaders[:len(multipartReaders)-1]
233 path = path[:len(path)-1]
234 continue
235 } else if IsUnknownEncoding(err) || IsUnknownCharset(err) {
236 // Forward the error to walkFunc
237 } else if err != nil {
238 return err
239 }
240 241 path[len(path)-1]++
242 }
243 244 // Copy the path since we'll mutate it on the next iteration
245 var pathCopy []int
246 if len(path) > 0 {
247 pathCopy = make([]int, len(path))
248 copy(pathCopy, path)
249 }
250 251 if err := walkFunc(pathCopy, part, err); err != nil {
252 return err
253 }
254 255 if mr := part.MultipartReader(); mr != nil {
256 multipartReaders = append(multipartReaders, mr)
257 path = append(path, -1)
258 }
259 260 part = nil
261 }
262 263 return nil
264 }
265