reader.mx raw

   1  // Copyright 2012 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package quotedprintable implements quoted-printable encoding as specified by
   6  // RFC 2045.
   7  package quotedprintable
   8  
   9  import (
  10  	"bufio"
  11  	"bytes"
  12  	"fmt"
  13  	"io"
  14  )
  15  
  16  // Reader is a quoted-printable decoder.
  17  type Reader struct {
  18  	br   *bufio.Reader
  19  	rerr error  // last read error
  20  	line []byte // to be consumed before more of br
  21  }
  22  
  23  // NewReader returns a quoted-printable reader, decoding from r.
  24  func NewReader(r io.Reader) *Reader {
  25  	return &Reader{
  26  		br: bufio.NewReader(r),
  27  	}
  28  }
  29  
  30  func fromHex(b byte) (byte, error) {
  31  	switch {
  32  	case b >= '0' && b <= '9':
  33  		return b - '0', nil
  34  	case b >= 'A' && b <= 'F':
  35  		return b - 'A' + 10, nil
  36  	// Accept badly encoded bytes.
  37  	case b >= 'a' && b <= 'f':
  38  		return b - 'a' + 10, nil
  39  	}
  40  	return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
  41  }
  42  
  43  func readHexByte(v []byte) (b byte, err error) {
  44  	if len(v) < 2 {
  45  		return 0, io.ErrUnexpectedEOF
  46  	}
  47  	var hb, lb byte
  48  	if hb, err = fromHex(v[0]); err != nil {
  49  		return 0, err
  50  	}
  51  	if lb, err = fromHex(v[1]); err != nil {
  52  		return 0, err
  53  	}
  54  	return hb<<4 | lb, nil
  55  }
  56  
  57  func isQPDiscardWhitespace(r rune) bool {
  58  	switch r {
  59  	case '\n', '\r', ' ', '\t':
  60  		return true
  61  	}
  62  	return false
  63  }
  64  
  65  var (
  66  	crlf       = []byte("\r\n")
  67  	lf         = []byte("\n")
  68  	softSuffix = []byte("=")
  69  	lwspChar   = " \t"
  70  )
  71  
  72  // Read reads and decodes quoted-printable data from the underlying reader.
  73  func (r *Reader) Read(p []byte) (n int, err error) {
  74  	// Deviations from RFC 2045:
  75  	// 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
  76  	// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
  77  	//    with other broken QP encoders & decoders.
  78  	// 3. it accepts soft line-break (=) at end of message (issue 15486); i.e.
  79  	//    the final byte read from the underlying reader is allowed to be '=',
  80  	//    and it will be silently ignored.
  81  	// 4. it takes = as literal = if not followed by two hex digits
  82  	//    but not at end of line (issue 13219).
  83  	for len(p) > 0 {
  84  		if len(r.line) == 0 {
  85  			if r.rerr != nil {
  86  				return n, r.rerr
  87  			}
  88  			r.line, r.rerr = r.br.ReadSlice('\n')
  89  
  90  			// Does the line end in CRLF instead of just LF?
  91  			hasLF := bytes.HasSuffix(r.line, lf)
  92  			hasCR := bytes.HasSuffix(r.line, crlf)
  93  			wholeLine := r.line
  94  			r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
  95  			if bytes.HasSuffix(r.line, softSuffix) {
  96  				rightStripped := bytes.TrimLeft(wholeLine[len(r.line):], lwspChar)
  97  				r.line = r.line[:len(r.line)-1]
  98  				if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) &&
  99  					!(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) {
 100  					r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
 101  				}
 102  			} else if hasLF {
 103  				if hasCR {
 104  					r.line = append(r.line, '\r', '\n')
 105  				} else {
 106  					r.line = append(r.line, '\n')
 107  				}
 108  			}
 109  			continue
 110  		}
 111  		b := r.line[0]
 112  
 113  		switch {
 114  		case b == '=':
 115  			b, err = readHexByte(r.line[1:])
 116  			if err != nil {
 117  				if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' {
 118  					// Take the = as a literal =.
 119  					b = '='
 120  					break
 121  				}
 122  				return n, err
 123  			}
 124  			r.line = r.line[2:] // 2 of the 3; other 1 is done below
 125  		case b == '\t' || b == '\r' || b == '\n':
 126  			break
 127  		case b >= 0x80:
 128  			// As an extension to RFC 2045, we accept
 129  			// values >= 0x80 without complaint. Issue 22597.
 130  			break
 131  		case b < ' ' || b > '~':
 132  			return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
 133  		}
 134  		p[0] = b
 135  		p = p[1:]
 136  		r.line = r.line[1:]
 137  		n++
 138  	}
 139  	return n, nil
 140  }
 141