ascii85.mx raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package ascii85 implements the ascii85 data encoding
   6  // as used in the btoa tool and Adobe's PostScript and PDF document formats.
   7  package ascii85
   8  
   9  import (
  10  	"io"
  11  	"strconv"
  12  )
  13  
  14  /*
  15   * Encoder
  16   */
  17  
  18  // Encode encodes src into at most [MaxEncodedLen](len(src))
  19  // bytes of dst, returning the actual number of bytes written.
  20  //
  21  // The encoding handles 4-byte chunks, using a special encoding
  22  // for the last fragment, so Encode is not appropriate for use on
  23  // individual blocks of a large data stream. Use [NewEncoder] instead.
  24  //
  25  // Often, ascii85-encoded data is wrapped in <~ and ~> symbols.
  26  // Encode does not add these.
  27  func Encode(dst, src []byte) int {
  28  	if len(src) == 0 {
  29  		return 0
  30  	}
  31  
  32  	n := 0
  33  	for len(src) > 0 {
  34  		dst[0] = 0
  35  		dst[1] = 0
  36  		dst[2] = 0
  37  		dst[3] = 0
  38  		dst[4] = 0
  39  
  40  		// Unpack 4 bytes into uint32 to repack into base 85 5-byte.
  41  		var v uint32
  42  		switch len(src) {
  43  		default:
  44  			v |= uint32(src[3])
  45  			v |= uint32(src[2]) << 8
  46  			v |= uint32(src[1]) << 16
  47  			v |= uint32(src[0]) << 24
  48  		case 3:
  49  			v |= uint32(src[2]) << 8
  50  			v |= uint32(src[1]) << 16
  51  			v |= uint32(src[0]) << 24
  52  		case 2:
  53  			v |= uint32(src[1]) << 16
  54  			v |= uint32(src[0]) << 24
  55  		case 1:
  56  			v |= uint32(src[0]) << 24
  57  		}
  58  
  59  		// Special case: zero (!!!!!) shortens to z.
  60  		if v == 0 && len(src) >= 4 {
  61  			dst[0] = 'z'
  62  			dst = dst[1:]
  63  			src = src[4:]
  64  			n++
  65  			continue
  66  		}
  67  
  68  		// Otherwise, 5 base 85 digits starting at !.
  69  		for i := 4; i >= 0; i-- {
  70  			dst[i] = '!' + byte(v%85)
  71  			v /= 85
  72  		}
  73  
  74  		// If src was short, discard the low destination bytes.
  75  		m := 5
  76  		if len(src) < 4 {
  77  			m -= 4 - len(src)
  78  			src = nil
  79  		} else {
  80  			src = src[4:]
  81  		}
  82  		dst = dst[m:]
  83  		n += m
  84  	}
  85  	return n
  86  }
  87  
  88  // MaxEncodedLen returns the maximum length of an encoding of n source bytes.
  89  func MaxEncodedLen(n int) int { return (n + 3) / 4 * 5 }
  90  
  91  // NewEncoder returns a new ascii85 stream encoder. Data written to
  92  // the returned writer will be encoded and then written to w.
  93  // Ascii85 encodings operate in 32-bit blocks; when finished
  94  // writing, the caller must Close the returned encoder to flush any
  95  // trailing partial block.
  96  func NewEncoder(w io.Writer) io.WriteCloser { return &encoder{w: w} }
  97  
  98  type encoder struct {
  99  	err  error
 100  	w    io.Writer
 101  	buf  [4]byte    // buffered data waiting to be encoded
 102  	nbuf int        // number of bytes in buf
 103  	out  [1024]byte // output buffer
 104  }
 105  
 106  func (e *encoder) Write(p []byte) (n int, err error) {
 107  	if e.err != nil {
 108  		return 0, e.err
 109  	}
 110  
 111  	// Leading fringe.
 112  	if e.nbuf > 0 {
 113  		var i int
 114  		for i = 0; i < len(p) && e.nbuf < 4; i++ {
 115  			e.buf[e.nbuf] = p[i]
 116  			e.nbuf++
 117  		}
 118  		n += i
 119  		p = p[i:]
 120  		if e.nbuf < 4 {
 121  			return
 122  		}
 123  		nout := Encode(e.out[0:], e.buf[0:])
 124  		if _, e.err = e.w.Write(e.out[0:nout]); e.err != nil {
 125  			return n, e.err
 126  		}
 127  		e.nbuf = 0
 128  	}
 129  
 130  	// Large interior chunks.
 131  	for len(p) >= 4 {
 132  		nn := len(e.out) / 5 * 4
 133  		if nn > len(p) {
 134  			nn = len(p)
 135  		}
 136  		nn -= nn % 4
 137  		if nn > 0 {
 138  			nout := Encode(e.out[0:], p[0:nn])
 139  			if _, e.err = e.w.Write(e.out[0:nout]); e.err != nil {
 140  				return n, e.err
 141  			}
 142  		}
 143  		n += nn
 144  		p = p[nn:]
 145  	}
 146  
 147  	// Trailing fringe.
 148  	copy(e.buf[:], p)
 149  	e.nbuf = len(p)
 150  	n += len(p)
 151  	return
 152  }
 153  
 154  // Close flushes any pending output from the encoder.
 155  // It is an error to call Write after calling Close.
 156  func (e *encoder) Close() error {
 157  	// If there's anything left in the buffer, flush it out
 158  	if e.err == nil && e.nbuf > 0 {
 159  		nout := Encode(e.out[0:], e.buf[0:e.nbuf])
 160  		e.nbuf = 0
 161  		_, e.err = e.w.Write(e.out[0:nout])
 162  	}
 163  	return e.err
 164  }
 165  
 166  /*
 167   * Decoder
 168   */
 169  
 170  type CorruptInputError int64
 171  
 172  func (e CorruptInputError) Error() string {
 173  	return "illegal ascii85 data at input byte " + strconv.FormatInt(int64(e), 10)
 174  }
 175  
 176  // Decode decodes src into dst, returning both the number
 177  // of bytes written to dst and the number consumed from src.
 178  // If src contains invalid ascii85 data, Decode will return the
 179  // number of bytes successfully written and a [CorruptInputError].
 180  // Decode ignores space and control characters in src.
 181  // Often, ascii85-encoded data is wrapped in <~ and ~> symbols.
 182  // Decode expects these to have been stripped by the caller.
 183  //
 184  // If flush is true, Decode assumes that src represents the
 185  // end of the input stream and processes it completely rather
 186  // than wait for the completion of another 32-bit block.
 187  //
 188  // [NewDecoder] wraps an [io.Reader] interface around Decode.
 189  func Decode(dst, src []byte, flush bool) (ndst, nsrc int, err error) {
 190  	var v uint32
 191  	var nb int
 192  	for i, b := range src {
 193  		if len(dst)-ndst < 4 {
 194  			return
 195  		}
 196  		switch {
 197  		case b <= ' ':
 198  			continue
 199  		case b == 'z' && nb == 0:
 200  			nb = 5
 201  			v = 0
 202  		case '!' <= b && b <= 'u':
 203  			v = v*85 + uint32(b-'!')
 204  			nb++
 205  		default:
 206  			return 0, 0, CorruptInputError(i)
 207  		}
 208  		if nb == 5 {
 209  			nsrc = i + 1
 210  			dst[ndst] = byte(v >> 24)
 211  			dst[ndst+1] = byte(v >> 16)
 212  			dst[ndst+2] = byte(v >> 8)
 213  			dst[ndst+3] = byte(v)
 214  			ndst += 4
 215  			nb = 0
 216  			v = 0
 217  		}
 218  	}
 219  	if flush {
 220  		nsrc = len(src)
 221  		if nb > 0 {
 222  			// The number of output bytes in the last fragment
 223  			// is the number of leftover input bytes - 1:
 224  			// the extra byte provides enough bits to cover
 225  			// the inefficiency of the encoding for the block.
 226  			if nb == 1 {
 227  				return 0, 0, CorruptInputError(len(src))
 228  			}
 229  			for i := nb; i < 5; i++ {
 230  				// The short encoding truncated the output value.
 231  				// We have to assume the worst case values (digit 84)
 232  				// in order to ensure that the top bits are correct.
 233  				v = v*85 + 84
 234  			}
 235  			for i := 0; i < nb-1; i++ {
 236  				dst[ndst] = byte(v >> 24)
 237  				v <<= 8
 238  				ndst++
 239  			}
 240  		}
 241  	}
 242  	return
 243  }
 244  
 245  // NewDecoder constructs a new ascii85 stream decoder.
 246  func NewDecoder(r io.Reader) io.Reader { return &decoder{r: r} }
 247  
 248  type decoder struct {
 249  	err     error
 250  	readErr error
 251  	r       io.Reader
 252  	buf     [1024]byte // leftover input
 253  	nbuf    int
 254  	out     []byte // leftover decoded output
 255  	outbuf  [1024]byte
 256  }
 257  
 258  func (d *decoder) Read(p []byte) (n int, err error) {
 259  	if len(p) == 0 {
 260  		return 0, nil
 261  	}
 262  	if d.err != nil {
 263  		return 0, d.err
 264  	}
 265  
 266  	for {
 267  		// Copy leftover output from last decode.
 268  		if len(d.out) > 0 {
 269  			n = copy(p, d.out)
 270  			d.out = d.out[n:]
 271  			return
 272  		}
 273  
 274  		// Decode leftover input from last read.
 275  		var nn, nsrc, ndst int
 276  		if d.nbuf > 0 {
 277  			ndst, nsrc, d.err = Decode(d.outbuf[0:], d.buf[0:d.nbuf], d.readErr != nil)
 278  			if ndst > 0 {
 279  				d.out = d.outbuf[0:ndst]
 280  				d.nbuf = copy(d.buf[0:], d.buf[nsrc:d.nbuf])
 281  				continue // copy out and return
 282  			}
 283  			if ndst == 0 && d.err == nil {
 284  				// Special case: input buffer is mostly filled with non-data bytes.
 285  				// Filter out such bytes to make room for more input.
 286  				off := 0
 287  				for i := 0; i < d.nbuf; i++ {
 288  					if d.buf[i] > ' ' {
 289  						d.buf[off] = d.buf[i]
 290  						off++
 291  					}
 292  				}
 293  				d.nbuf = off
 294  			}
 295  		}
 296  
 297  		// Out of input, out of decoded output. Check errors.
 298  		if d.err != nil {
 299  			return 0, d.err
 300  		}
 301  		if d.readErr != nil {
 302  			d.err = d.readErr
 303  			return 0, d.err
 304  		}
 305  
 306  		// Read more data.
 307  		nn, d.readErr = d.r.Read(d.buf[d.nbuf:])
 308  		d.nbuf += nn
 309  	}
 310  }
 311