encode.go raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package text
   6  
   7  import (
   8  	"math"
   9  	"math/bits"
  10  	"strconv"
  11  	"strings"
  12  	"unicode/utf8"
  13  
  14  	"google.golang.org/protobuf/internal/detrand"
  15  	"google.golang.org/protobuf/internal/errors"
  16  )
  17  
  18  // encType represents an encoding type.
  19  type encType uint8
  20  
  21  const (
  22  	_ encType = (1 << iota) / 2
  23  	name
  24  	scalar
  25  	messageOpen
  26  	messageClose
  27  )
  28  
  29  // Encoder provides methods to write out textproto constructs and values. The user is
  30  // responsible for producing valid sequences of constructs and values.
  31  type Encoder struct {
  32  	encoderState
  33  
  34  	indent      string
  35  	delims      [2]byte
  36  	outputASCII bool
  37  }
  38  
  39  type encoderState struct {
  40  	lastType encType
  41  	indents  []byte
  42  	out      []byte
  43  }
  44  
  45  // NewEncoder returns an Encoder.
  46  //
  47  // If indent is a non-empty string, it causes every entry in a List or Message
  48  // to be preceded by the indent and trailed by a newline.
  49  //
  50  // If delims is not the zero value, it controls the delimiter characters used
  51  // for messages (e.g., "{}" vs "<>").
  52  //
  53  // If outputASCII is true, strings will be serialized in such a way that
  54  // multi-byte UTF-8 sequences are escaped. This property ensures that the
  55  // overall output is ASCII (as opposed to UTF-8).
  56  func NewEncoder(buf []byte, indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
  57  	e := &Encoder{
  58  		encoderState: encoderState{out: buf},
  59  	}
  60  	if len(indent) > 0 {
  61  		if strings.Trim(indent, " \t") != "" {
  62  			return nil, errors.New("indent may only be composed of space and tab characters")
  63  		}
  64  		e.indent = indent
  65  	}
  66  	switch delims {
  67  	case [2]byte{0, 0}:
  68  		e.delims = [2]byte{'{', '}'}
  69  	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
  70  		e.delims = delims
  71  	default:
  72  		return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
  73  	}
  74  	e.outputASCII = outputASCII
  75  
  76  	return e, nil
  77  }
  78  
  79  // Bytes returns the content of the written bytes.
  80  func (e *Encoder) Bytes() []byte {
  81  	return e.out
  82  }
  83  
  84  // StartMessage writes out the '{' or '<' symbol.
  85  func (e *Encoder) StartMessage() {
  86  	e.prepareNext(messageOpen)
  87  	e.out = append(e.out, e.delims[0])
  88  }
  89  
  90  // EndMessage writes out the '}' or '>' symbol.
  91  func (e *Encoder) EndMessage() {
  92  	e.prepareNext(messageClose)
  93  	e.out = append(e.out, e.delims[1])
  94  }
  95  
  96  // WriteName writes out the field name and the separator ':'.
  97  func (e *Encoder) WriteName(s string) {
  98  	e.prepareNext(name)
  99  	e.out = append(e.out, s...)
 100  	e.out = append(e.out, ':')
 101  }
 102  
 103  // WriteBool writes out the given boolean value.
 104  func (e *Encoder) WriteBool(b bool) {
 105  	if b {
 106  		e.WriteLiteral("true")
 107  	} else {
 108  		e.WriteLiteral("false")
 109  	}
 110  }
 111  
 112  // WriteString writes out the given string value.
 113  func (e *Encoder) WriteString(s string) {
 114  	e.prepareNext(scalar)
 115  	e.out = appendString(e.out, s, e.outputASCII)
 116  }
 117  
 118  func appendString(out []byte, in string, outputASCII bool) []byte {
 119  	out = append(out, '"')
 120  	i := indexNeedEscapeInString(in)
 121  	in, out = in[i:], append(out, in[:i]...)
 122  	for len(in) > 0 {
 123  		switch r, n := utf8.DecodeRuneInString(in); {
 124  		case r == utf8.RuneError && n == 1:
 125  			// We do not report invalid UTF-8 because strings in the text format
 126  			// are used to represent both the proto string and bytes type.
 127  			r = rune(in[0])
 128  			fallthrough
 129  		case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
 130  			out = append(out, '\\')
 131  			switch r {
 132  			case '"', '\\':
 133  				out = append(out, byte(r))
 134  			case '\n':
 135  				out = append(out, 'n')
 136  			case '\r':
 137  				out = append(out, 'r')
 138  			case '\t':
 139  				out = append(out, 't')
 140  			default:
 141  				out = append(out, 'x')
 142  				out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
 143  				out = strconv.AppendUint(out, uint64(r), 16)
 144  			}
 145  			in = in[n:]
 146  		case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
 147  			out = append(out, '\\')
 148  			if r <= math.MaxUint16 {
 149  				out = append(out, 'u')
 150  				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
 151  				out = strconv.AppendUint(out, uint64(r), 16)
 152  			} else {
 153  				out = append(out, 'U')
 154  				out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
 155  				out = strconv.AppendUint(out, uint64(r), 16)
 156  			}
 157  			in = in[n:]
 158  		default:
 159  			i := indexNeedEscapeInString(in[n:])
 160  			in, out = in[n+i:], append(out, in[:n+i]...)
 161  		}
 162  	}
 163  	out = append(out, '"')
 164  	return out
 165  }
 166  
 167  // indexNeedEscapeInString returns the index of the character that needs
 168  // escaping. If no characters need escaping, this returns the input length.
 169  func indexNeedEscapeInString(s string) int {
 170  	for i := 0; i < len(s); i++ {
 171  		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
 172  			return i
 173  		}
 174  	}
 175  	return len(s)
 176  }
 177  
 178  // WriteFloat writes out the given float value for given bitSize.
 179  func (e *Encoder) WriteFloat(n float64, bitSize int) {
 180  	e.prepareNext(scalar)
 181  	e.out = appendFloat(e.out, n, bitSize)
 182  }
 183  
 184  func appendFloat(out []byte, n float64, bitSize int) []byte {
 185  	switch {
 186  	case math.IsNaN(n):
 187  		return append(out, "nan"...)
 188  	case math.IsInf(n, +1):
 189  		return append(out, "inf"...)
 190  	case math.IsInf(n, -1):
 191  		return append(out, "-inf"...)
 192  	default:
 193  		return strconv.AppendFloat(out, n, 'g', -1, bitSize)
 194  	}
 195  }
 196  
 197  // WriteInt writes out the given signed integer value.
 198  func (e *Encoder) WriteInt(n int64) {
 199  	e.prepareNext(scalar)
 200  	e.out = strconv.AppendInt(e.out, n, 10)
 201  }
 202  
 203  // WriteUint writes out the given unsigned integer value.
 204  func (e *Encoder) WriteUint(n uint64) {
 205  	e.prepareNext(scalar)
 206  	e.out = strconv.AppendUint(e.out, n, 10)
 207  }
 208  
 209  // WriteLiteral writes out the given string as a literal value without quotes.
 210  // This is used for writing enum literal strings.
 211  func (e *Encoder) WriteLiteral(s string) {
 212  	e.prepareNext(scalar)
 213  	e.out = append(e.out, s...)
 214  }
 215  
 216  // prepareNext adds possible space and indentation for the next value based
 217  // on last encType and indent option. It also updates e.lastType to next.
 218  func (e *Encoder) prepareNext(next encType) {
 219  	defer func() {
 220  		e.lastType = next
 221  	}()
 222  
 223  	// Single line.
 224  	if len(e.indent) == 0 {
 225  		// Add space after each field before the next one.
 226  		if e.lastType&(scalar|messageClose) != 0 && next == name {
 227  			e.out = append(e.out, ' ')
 228  			// Add a random extra space to make output unstable.
 229  			if detrand.Bool() {
 230  				e.out = append(e.out, ' ')
 231  			}
 232  		}
 233  		return
 234  	}
 235  
 236  	// Multi-line.
 237  	switch {
 238  	case e.lastType == name:
 239  		e.out = append(e.out, ' ')
 240  		// Add a random extra space after name: to make output unstable.
 241  		if detrand.Bool() {
 242  			e.out = append(e.out, ' ')
 243  		}
 244  
 245  	case e.lastType == messageOpen && next != messageClose:
 246  		e.indents = append(e.indents, e.indent...)
 247  		e.out = append(e.out, '\n')
 248  		e.out = append(e.out, e.indents...)
 249  
 250  	case e.lastType&(scalar|messageClose) != 0:
 251  		if next == messageClose {
 252  			e.indents = e.indents[:len(e.indents)-len(e.indent)]
 253  		}
 254  		e.out = append(e.out, '\n')
 255  		e.out = append(e.out, e.indents...)
 256  	}
 257  }
 258  
 259  // Snapshot returns the current snapshot for use in Reset.
 260  func (e *Encoder) Snapshot() encoderState {
 261  	return e.encoderState
 262  }
 263  
 264  // Reset resets the Encoder to the given encoderState from a Snapshot.
 265  func (e *Encoder) Reset(es encoderState) {
 266  	e.encoderState = es
 267  }
 268  
 269  // AppendString appends the escaped form of the input string to b.
 270  func AppendString(b []byte, s string) []byte {
 271  	return appendString(b, s, false)
 272  }
 273