encode.go raw

   1  // Copyright 2019 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package proto
   6  
   7  import (
   8  	"errors"
   9  	"fmt"
  10  
  11  	"google.golang.org/protobuf/encoding/protowire"
  12  	"google.golang.org/protobuf/internal/encoding/messageset"
  13  	"google.golang.org/protobuf/internal/order"
  14  	"google.golang.org/protobuf/internal/pragma"
  15  	"google.golang.org/protobuf/reflect/protoreflect"
  16  	"google.golang.org/protobuf/runtime/protoiface"
  17  
  18  	protoerrors "google.golang.org/protobuf/internal/errors"
  19  )
  20  
  21  // MarshalOptions configures the marshaler.
  22  //
  23  // Example usage:
  24  //
  25  //	b, err := MarshalOptions{Deterministic: true}.Marshal(m)
  26  type MarshalOptions struct {
  27  	pragma.NoUnkeyedLiterals
  28  
  29  	// AllowPartial allows messages that have missing required fields to marshal
  30  	// without returning an error. If AllowPartial is false (the default),
  31  	// Marshal will return an error if there are any missing required fields.
  32  	AllowPartial bool
  33  
  34  	// Deterministic controls whether the same message will always be
  35  	// serialized to the same bytes within the same binary.
  36  	//
  37  	// Setting this option guarantees that repeated serialization of
  38  	// the same message will return the same bytes, and that different
  39  	// processes of the same binary (which may be executing on different
  40  	// machines) will serialize equal messages to the same bytes.
  41  	// It has no effect on the resulting size of the encoded message compared
  42  	// to a non-deterministic marshal.
  43  	//
  44  	// Note that the deterministic serialization is NOT canonical across
  45  	// languages. It is not guaranteed to remain stable over time. It is
  46  	// unstable across different builds with schema changes due to unknown
  47  	// fields. Users who need canonical serialization (e.g., persistent
  48  	// storage in a canonical form, fingerprinting, etc.) must define
  49  	// their own canonicalization specification and implement their own
  50  	// serializer rather than relying on this API.
  51  	//
  52  	// If deterministic serialization is requested, map entries will be
  53  	// sorted by keys in lexographical order. This is an implementation
  54  	// detail and subject to change.
  55  	Deterministic bool
  56  
  57  	// UseCachedSize indicates that the result of a previous Size call
  58  	// may be reused.
  59  	//
  60  	// Setting this option asserts that:
  61  	//
  62  	// 1. Size has previously been called on this message with identical
  63  	// options (except for UseCachedSize itself).
  64  	//
  65  	// 2. The message and all its submessages have not changed in any
  66  	// way since the Size call. For lazily decoded messages, accessing
  67  	// a message results in decoding the message, which is a change.
  68  	//
  69  	// If either of these invariants is violated,
  70  	// the results are undefined and may include panics or corrupted output.
  71  	//
  72  	// Implementations MAY take this option into account to provide
  73  	// better performance, but there is no guarantee that they will do so.
  74  	// There is absolutely no guarantee that Size followed by Marshal with
  75  	// UseCachedSize set will perform equivalently to Marshal alone.
  76  	UseCachedSize bool
  77  }
  78  
  79  // flags turns the specified MarshalOptions (user-facing) into
  80  // protoiface.MarshalInputFlags (used internally by the marshaler).
  81  //
  82  // See impl.marshalOptions.Options for the inverse operation.
  83  func (o MarshalOptions) flags() protoiface.MarshalInputFlags {
  84  	var flags protoiface.MarshalInputFlags
  85  
  86  	// Note: o.AllowPartial is always forced to true by MarshalOptions.marshal,
  87  	// which is why it is not a part of MarshalInputFlags.
  88  
  89  	if o.Deterministic {
  90  		flags |= protoiface.MarshalDeterministic
  91  	}
  92  
  93  	if o.UseCachedSize {
  94  		flags |= protoiface.MarshalUseCachedSize
  95  	}
  96  
  97  	return flags
  98  }
  99  
 100  // Marshal returns the wire-format encoding of m.
 101  //
 102  // This is the most common entry point for encoding a Protobuf message.
 103  //
 104  // See the [MarshalOptions] type if you need more control.
 105  func Marshal(m Message) ([]byte, error) {
 106  	// Treat nil message interface as an empty message; nothing to output.
 107  	if m == nil {
 108  		return nil, nil
 109  	}
 110  
 111  	out, err := MarshalOptions{}.marshal(nil, m.ProtoReflect())
 112  	if len(out.Buf) == 0 && err == nil {
 113  		out.Buf = emptyBytesForMessage(m)
 114  	}
 115  	return out.Buf, err
 116  }
 117  
 118  // Marshal returns the wire-format encoding of m.
 119  func (o MarshalOptions) Marshal(m Message) ([]byte, error) {
 120  	// Treat nil message interface as an empty message; nothing to output.
 121  	if m == nil {
 122  		return nil, nil
 123  	}
 124  
 125  	out, err := o.marshal(nil, m.ProtoReflect())
 126  	if len(out.Buf) == 0 && err == nil {
 127  		out.Buf = emptyBytesForMessage(m)
 128  	}
 129  	return out.Buf, err
 130  }
 131  
 132  // emptyBytesForMessage returns a nil buffer if and only if m is invalid,
 133  // otherwise it returns a non-nil empty buffer.
 134  //
 135  // This is to assist the edge-case where user-code does the following:
 136  //
 137  //	m1.OptionalBytes, _ = proto.Marshal(m2)
 138  //
 139  // where they expect the proto2 "optional_bytes" field to be populated
 140  // if any only if m2 is a valid message.
 141  func emptyBytesForMessage(m Message) []byte {
 142  	if m == nil || !m.ProtoReflect().IsValid() {
 143  		return nil
 144  	}
 145  	return emptyBuf[:]
 146  }
 147  
 148  // MarshalAppend appends the wire-format encoding of m to b,
 149  // returning the result.
 150  //
 151  // This is a less common entry point than [Marshal], which is only needed if you
 152  // need to supply your own buffers for performance reasons.
 153  func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) {
 154  	// Treat nil message interface as an empty message; nothing to append.
 155  	if m == nil {
 156  		return b, nil
 157  	}
 158  
 159  	out, err := o.marshal(b, m.ProtoReflect())
 160  	return out.Buf, err
 161  }
 162  
 163  // MarshalState returns the wire-format encoding of a message.
 164  //
 165  // This method permits fine-grained control over the marshaler.
 166  // Most users should use [Marshal] instead.
 167  func (o MarshalOptions) MarshalState(in protoiface.MarshalInput) (protoiface.MarshalOutput, error) {
 168  	return o.marshal(in.Buf, in.Message)
 169  }
 170  
 171  // marshal is a centralized function that all marshal operations go through.
 172  // For profiling purposes, avoid changing the name of this function or
 173  // introducing other code paths for marshal that do not go through this.
 174  func (o MarshalOptions) marshal(b []byte, m protoreflect.Message) (out protoiface.MarshalOutput, err error) {
 175  	allowPartial := o.AllowPartial
 176  	o.AllowPartial = true
 177  	if methods := protoMethods(m); methods != nil && methods.Marshal != nil &&
 178  		!(o.Deterministic && methods.Flags&protoiface.SupportMarshalDeterministic == 0) {
 179  		in := protoiface.MarshalInput{
 180  			Message: m,
 181  			Buf:     b,
 182  			Flags:   o.flags(),
 183  		}
 184  		if methods.Size != nil {
 185  			sout := methods.Size(protoiface.SizeInput{
 186  				Message: m,
 187  				Flags:   in.Flags,
 188  			})
 189  			if cap(b) < len(b)+sout.Size {
 190  				in.Buf = make([]byte, len(b), growcap(cap(b), len(b)+sout.Size))
 191  				copy(in.Buf, b)
 192  			}
 193  			in.Flags |= protoiface.MarshalUseCachedSize
 194  		}
 195  		out, err = methods.Marshal(in)
 196  	} else {
 197  		out.Buf, err = o.marshalMessageSlow(b, m)
 198  	}
 199  	if err != nil {
 200  		var mismatch *protoerrors.SizeMismatchError
 201  		if errors.As(err, &mismatch) {
 202  			return out, fmt.Errorf("marshaling %s: %v", string(m.Descriptor().FullName()), err)
 203  		}
 204  		return out, err
 205  	}
 206  	if allowPartial {
 207  		return out, nil
 208  	}
 209  	return out, checkInitialized(m)
 210  }
 211  
 212  func (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) {
 213  	out, err := o.marshal(b, m)
 214  	return out.Buf, err
 215  }
 216  
 217  // growcap scales up the capacity of a slice.
 218  //
 219  // Given a slice with a current capacity of oldcap and a desired
 220  // capacity of wantcap, growcap returns a new capacity >= wantcap.
 221  //
 222  // The algorithm is mostly identical to the one used by append as of Go 1.14.
 223  func growcap(oldcap, wantcap int) (newcap int) {
 224  	if wantcap > oldcap*2 {
 225  		newcap = wantcap
 226  	} else if oldcap < 1024 {
 227  		// The Go 1.14 runtime takes this case when len(s) < 1024,
 228  		// not when cap(s) < 1024. The difference doesn't seem
 229  		// significant here.
 230  		newcap = oldcap * 2
 231  	} else {
 232  		newcap = oldcap
 233  		for 0 < newcap && newcap < wantcap {
 234  			newcap += newcap / 4
 235  		}
 236  		if newcap <= 0 {
 237  			newcap = wantcap
 238  		}
 239  	}
 240  	return newcap
 241  }
 242  
 243  func (o MarshalOptions) marshalMessageSlow(b []byte, m protoreflect.Message) ([]byte, error) {
 244  	if messageset.IsMessageSet(m.Descriptor()) {
 245  		return o.marshalMessageSet(b, m)
 246  	}
 247  	fieldOrder := order.AnyFieldOrder
 248  	if o.Deterministic {
 249  		// TODO: This should use a more natural ordering like NumberFieldOrder,
 250  		// but doing so breaks golden tests that make invalid assumption about
 251  		// output stability of this implementation.
 252  		fieldOrder = order.LegacyFieldOrder
 253  	}
 254  	var err error
 255  	order.RangeFields(m, fieldOrder, func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool {
 256  		b, err = o.marshalField(b, fd, v)
 257  		return err == nil
 258  	})
 259  	if err != nil {
 260  		return b, err
 261  	}
 262  	b = append(b, m.GetUnknown()...)
 263  	return b, nil
 264  }
 265  
 266  func (o MarshalOptions) marshalField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) {
 267  	switch {
 268  	case fd.IsList():
 269  		return o.marshalList(b, fd, value.List())
 270  	case fd.IsMap():
 271  		return o.marshalMap(b, fd, value.Map())
 272  	default:
 273  		b = protowire.AppendTag(b, fd.Number(), wireTypes[fd.Kind()])
 274  		return o.marshalSingular(b, fd, value)
 275  	}
 276  }
 277  
 278  func (o MarshalOptions) marshalList(b []byte, fd protoreflect.FieldDescriptor, list protoreflect.List) ([]byte, error) {
 279  	if fd.IsPacked() && list.Len() > 0 {
 280  		b = protowire.AppendTag(b, fd.Number(), protowire.BytesType)
 281  		b, pos := appendSpeculativeLength(b)
 282  		for i, llen := 0, list.Len(); i < llen; i++ {
 283  			var err error
 284  			b, err = o.marshalSingular(b, fd, list.Get(i))
 285  			if err != nil {
 286  				return b, err
 287  			}
 288  		}
 289  		b = finishSpeculativeLength(b, pos)
 290  		return b, nil
 291  	}
 292  
 293  	kind := fd.Kind()
 294  	for i, llen := 0, list.Len(); i < llen; i++ {
 295  		var err error
 296  		b = protowire.AppendTag(b, fd.Number(), wireTypes[kind])
 297  		b, err = o.marshalSingular(b, fd, list.Get(i))
 298  		if err != nil {
 299  			return b, err
 300  		}
 301  	}
 302  	return b, nil
 303  }
 304  
 305  func (o MarshalOptions) marshalMap(b []byte, fd protoreflect.FieldDescriptor, mapv protoreflect.Map) ([]byte, error) {
 306  	keyf := fd.MapKey()
 307  	valf := fd.MapValue()
 308  	keyOrder := order.AnyKeyOrder
 309  	if o.Deterministic {
 310  		keyOrder = order.GenericKeyOrder
 311  	}
 312  	var err error
 313  	order.RangeEntries(mapv, keyOrder, func(key protoreflect.MapKey, value protoreflect.Value) bool {
 314  		b = protowire.AppendTag(b, fd.Number(), protowire.BytesType)
 315  		var pos int
 316  		b, pos = appendSpeculativeLength(b)
 317  
 318  		b, err = o.marshalField(b, keyf, key.Value())
 319  		if err != nil {
 320  			return false
 321  		}
 322  		b, err = o.marshalField(b, valf, value)
 323  		if err != nil {
 324  			return false
 325  		}
 326  		b = finishSpeculativeLength(b, pos)
 327  		return true
 328  	})
 329  	return b, err
 330  }
 331  
 332  // When encoding length-prefixed fields, we speculatively set aside some number of bytes
 333  // for the length, encode the data, and then encode the length (shifting the data if necessary
 334  // to make room).
 335  const speculativeLength = 1
 336  
 337  func appendSpeculativeLength(b []byte) ([]byte, int) {
 338  	pos := len(b)
 339  	b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...)
 340  	return b, pos
 341  }
 342  
 343  func finishSpeculativeLength(b []byte, pos int) []byte {
 344  	mlen := len(b) - pos - speculativeLength
 345  	msiz := protowire.SizeVarint(uint64(mlen))
 346  	if msiz != speculativeLength {
 347  		for i := 0; i < msiz-speculativeLength; i++ {
 348  			b = append(b, 0)
 349  		}
 350  		copy(b[pos+msiz:], b[pos+speculativeLength:])
 351  		b = b[:pos+msiz+mlen]
 352  	}
 353  	protowire.AppendVarint(b[:pos], uint64(mlen))
 354  	return b
 355  }
 356