reader.mx raw
1 // Copyright 2012 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Package quotedprintable implements quoted-printable encoding as specified by
6 // RFC 2045.
7 package quotedprintable
8
9 import (
10 "bufio"
11 "bytes"
12 "fmt"
13 "io"
14 )
15
16 // Reader is a quoted-printable decoder.
17 type Reader struct {
18 br *bufio.Reader
19 rerr error // last read error
20 line []byte // to be consumed before more of br
21 }
22
23 // NewReader returns a quoted-printable reader, decoding from r.
24 func NewReader(r io.Reader) *Reader {
25 return &Reader{
26 br: bufio.NewReader(r),
27 }
28 }
29
30 func fromHex(b byte) (byte, error) {
31 switch {
32 case b >= '0' && b <= '9':
33 return b - '0', nil
34 case b >= 'A' && b <= 'F':
35 return b - 'A' + 10, nil
36 // Accept badly encoded bytes.
37 case b >= 'a' && b <= 'f':
38 return b - 'a' + 10, nil
39 }
40 return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
41 }
42
43 func readHexByte(v []byte) (b byte, err error) {
44 if len(v) < 2 {
45 return 0, io.ErrUnexpectedEOF
46 }
47 var hb, lb byte
48 if hb, err = fromHex(v[0]); err != nil {
49 return 0, err
50 }
51 if lb, err = fromHex(v[1]); err != nil {
52 return 0, err
53 }
54 return hb<<4 | lb, nil
55 }
56
57 func isQPDiscardWhitespace(r rune) bool {
58 switch r {
59 case '\n', '\r', ' ', '\t':
60 return true
61 }
62 return false
63 }
64
65 var (
66 crlf = []byte("\r\n")
67 lf = []byte("\n")
68 softSuffix = []byte("=")
69 lwspChar = " \t"
70 )
71
72 // Read reads and decodes quoted-printable data from the underlying reader.
73 func (r *Reader) Read(p []byte) (n int, err error) {
74 // Deviations from RFC 2045:
75 // 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
76 // 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
77 // with other broken QP encoders & decoders.
78 // 3. it accepts soft line-break (=) at end of message (issue 15486); i.e.
79 // the final byte read from the underlying reader is allowed to be '=',
80 // and it will be silently ignored.
81 // 4. it takes = as literal = if not followed by two hex digits
82 // but not at end of line (issue 13219).
83 for len(p) > 0 {
84 if len(r.line) == 0 {
85 if r.rerr != nil {
86 return n, r.rerr
87 }
88 r.line, r.rerr = r.br.ReadSlice('\n')
89
90 // Does the line end in CRLF instead of just LF?
91 hasLF := bytes.HasSuffix(r.line, lf)
92 hasCR := bytes.HasSuffix(r.line, crlf)
93 wholeLine := r.line
94 r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
95 if bytes.HasSuffix(r.line, softSuffix) {
96 rightStripped := bytes.TrimLeft(wholeLine[len(r.line):], lwspChar)
97 r.line = r.line[:len(r.line)-1]
98 if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) &&
99 !(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) {
100 r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
101 }
102 } else if hasLF {
103 if hasCR {
104 r.line = append(r.line, '\r', '\n')
105 } else {
106 r.line = append(r.line, '\n')
107 }
108 }
109 continue
110 }
111 b := r.line[0]
112
113 switch {
114 case b == '=':
115 b, err = readHexByte(r.line[1:])
116 if err != nil {
117 if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' {
118 // Take the = as a literal =.
119 b = '='
120 break
121 }
122 return n, err
123 }
124 r.line = r.line[2:] // 2 of the 3; other 1 is done below
125 case b == '\t' || b == '\r' || b == '\n':
126 break
127 case b >= 0x80:
128 // As an extension to RFC 2045, we accept
129 // values >= 0x80 without complaint. Issue 22597.
130 break
131 case b < ' ' || b > '~':
132 return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
133 }
134 p[0] = b
135 p = p[1:]
136 r.line = r.line[1:]
137 n++
138 }
139 return n, nil
140 }
141