encode.go raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package prototext
   6  
   7  import (
   8  	"fmt"
   9  	"strconv"
  10  	"unicode/utf8"
  11  
  12  	"google.golang.org/protobuf/encoding/protowire"
  13  	"google.golang.org/protobuf/internal/encoding/messageset"
  14  	"google.golang.org/protobuf/internal/encoding/text"
  15  	"google.golang.org/protobuf/internal/errors"
  16  	"google.golang.org/protobuf/internal/flags"
  17  	"google.golang.org/protobuf/internal/genid"
  18  	"google.golang.org/protobuf/internal/order"
  19  	"google.golang.org/protobuf/internal/pragma"
  20  	"google.golang.org/protobuf/internal/strs"
  21  	"google.golang.org/protobuf/proto"
  22  	"google.golang.org/protobuf/reflect/protoreflect"
  23  	"google.golang.org/protobuf/reflect/protoregistry"
  24  )
  25  
  26  const defaultIndent = "  "
  27  
  28  // Format formats the message as a multiline string.
  29  // This function is only intended for human consumption and ignores errors.
  30  // Do not depend on the output being stable. Its output will change across
  31  // different builds of your program, even when using the same version of the
  32  // protobuf module.
  33  func Format(m proto.Message) string {
  34  	return MarshalOptions{Multiline: true}.Format(m)
  35  }
  36  
  37  // Marshal writes the given [proto.Message] in textproto format using default
  38  // options. Do not depend on the output being stable. Its output will change
  39  // across different builds of your program, even when using the same version of
  40  // the protobuf module.
  41  func Marshal(m proto.Message) ([]byte, error) {
  42  	return MarshalOptions{}.Marshal(m)
  43  }
  44  
  45  // MarshalOptions is a configurable text format marshaler.
  46  type MarshalOptions struct {
  47  	pragma.NoUnkeyedLiterals
  48  
  49  	// Multiline specifies whether the marshaler should format the output in
  50  	// indented-form with every textual element on a new line.
  51  	// If Indent is an empty string, then an arbitrary indent is chosen.
  52  	Multiline bool
  53  
  54  	// Indent specifies the set of indentation characters to use in a multiline
  55  	// formatted output such that every entry is preceded by Indent and
  56  	// terminated by a newline. If non-empty, then Multiline is treated as true.
  57  	// Indent can only be composed of space or tab characters.
  58  	Indent string
  59  
  60  	// EmitASCII specifies whether to format strings and bytes as ASCII only
  61  	// as opposed to using UTF-8 encoding when possible.
  62  	EmitASCII bool
  63  
  64  	// allowInvalidUTF8 specifies whether to permit the encoding of strings
  65  	// with invalid UTF-8. This is unexported as it is intended to only
  66  	// be specified by the Format method.
  67  	allowInvalidUTF8 bool
  68  
  69  	// AllowPartial allows messages that have missing required fields to marshal
  70  	// without returning an error. If AllowPartial is false (the default),
  71  	// Marshal will return error if there are any missing required fields.
  72  	AllowPartial bool
  73  
  74  	// EmitUnknown specifies whether to emit unknown fields in the output.
  75  	// If specified, the unmarshaler may be unable to parse the output.
  76  	// The default is to exclude unknown fields.
  77  	EmitUnknown bool
  78  
  79  	// Resolver is used for looking up types when expanding google.protobuf.Any
  80  	// messages. If nil, this defaults to using protoregistry.GlobalTypes.
  81  	Resolver interface {
  82  		protoregistry.ExtensionTypeResolver
  83  		protoregistry.MessageTypeResolver
  84  	}
  85  }
  86  
  87  // Format formats the message as a string.
  88  // This method is only intended for human consumption and ignores errors.
  89  // Do not depend on the output being stable. Its output will change across
  90  // different builds of your program, even when using the same version of the
  91  // protobuf module.
  92  func (o MarshalOptions) Format(m proto.Message) string {
  93  	if m == nil || !m.ProtoReflect().IsValid() {
  94  		return "<nil>" // invalid syntax, but okay since this is for debugging
  95  	}
  96  	o.allowInvalidUTF8 = true
  97  	o.AllowPartial = true
  98  	o.EmitUnknown = true
  99  	b, _ := o.Marshal(m)
 100  	return string(b)
 101  }
 102  
 103  // Marshal writes the given [proto.Message] in textproto format using options in
 104  // MarshalOptions object. Do not depend on the output being stable. Its output
 105  // will change across different builds of your program, even when using the
 106  // same version of the protobuf module.
 107  func (o MarshalOptions) Marshal(m proto.Message) ([]byte, error) {
 108  	return o.marshal(nil, m)
 109  }
 110  
 111  // MarshalAppend appends the textproto format encoding of m to b,
 112  // returning the result.
 113  func (o MarshalOptions) MarshalAppend(b []byte, m proto.Message) ([]byte, error) {
 114  	return o.marshal(b, m)
 115  }
 116  
 117  // marshal is a centralized function that all marshal operations go through.
 118  // For profiling purposes, avoid changing the name of this function or
 119  // introducing other code paths for marshal that do not go through this.
 120  func (o MarshalOptions) marshal(b []byte, m proto.Message) ([]byte, error) {
 121  	var delims = [2]byte{'{', '}'}
 122  
 123  	if o.Multiline && o.Indent == "" {
 124  		o.Indent = defaultIndent
 125  	}
 126  	if o.Resolver == nil {
 127  		o.Resolver = protoregistry.GlobalTypes
 128  	}
 129  
 130  	internalEnc, err := text.NewEncoder(b, o.Indent, delims, o.EmitASCII)
 131  	if err != nil {
 132  		return nil, err
 133  	}
 134  
 135  	// Treat nil message interface as an empty message,
 136  	// in which case there is nothing to output.
 137  	if m == nil {
 138  		return b, nil
 139  	}
 140  
 141  	enc := encoder{internalEnc, o}
 142  	err = enc.marshalMessage(m.ProtoReflect(), false)
 143  	if err != nil {
 144  		return nil, err
 145  	}
 146  	out := enc.Bytes()
 147  	if len(o.Indent) > 0 && len(out) > 0 {
 148  		out = append(out, '\n')
 149  	}
 150  	if o.AllowPartial {
 151  		return out, nil
 152  	}
 153  	return out, proto.CheckInitialized(m)
 154  }
 155  
 156  type encoder struct {
 157  	*text.Encoder
 158  	opts MarshalOptions
 159  }
 160  
 161  // marshalMessage marshals the given protoreflect.Message.
 162  func (e encoder) marshalMessage(m protoreflect.Message, inclDelims bool) error {
 163  	messageDesc := m.Descriptor()
 164  	if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
 165  		return errors.New("no support for proto1 MessageSets")
 166  	}
 167  
 168  	if inclDelims {
 169  		e.StartMessage()
 170  		defer e.EndMessage()
 171  	}
 172  
 173  	// Handle Any expansion.
 174  	if messageDesc.FullName() == genid.Any_message_fullname {
 175  		if e.marshalAny(m) {
 176  			return nil
 177  		}
 178  		// If unable to expand, continue on to marshal Any as a regular message.
 179  	}
 180  
 181  	// Marshal fields.
 182  	var err error
 183  	order.RangeFields(m, order.IndexNameFieldOrder, func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool {
 184  		if err = e.marshalField(fd.TextName(), v, fd); err != nil {
 185  			return false
 186  		}
 187  		return true
 188  	})
 189  	if err != nil {
 190  		return err
 191  	}
 192  
 193  	// Marshal unknown fields.
 194  	if e.opts.EmitUnknown {
 195  		e.marshalUnknown(m.GetUnknown())
 196  	}
 197  
 198  	return nil
 199  }
 200  
 201  // marshalField marshals the given field with protoreflect.Value.
 202  func (e encoder) marshalField(name string, val protoreflect.Value, fd protoreflect.FieldDescriptor) error {
 203  	switch {
 204  	case fd.IsList():
 205  		return e.marshalList(name, val.List(), fd)
 206  	case fd.IsMap():
 207  		return e.marshalMap(name, val.Map(), fd)
 208  	default:
 209  		e.WriteName(name)
 210  		return e.marshalSingular(val, fd)
 211  	}
 212  }
 213  
 214  // marshalSingular marshals the given non-repeated field value. This includes
 215  // all scalar types, enums, messages, and groups.
 216  func (e encoder) marshalSingular(val protoreflect.Value, fd protoreflect.FieldDescriptor) error {
 217  	kind := fd.Kind()
 218  	switch kind {
 219  	case protoreflect.BoolKind:
 220  		e.WriteBool(val.Bool())
 221  
 222  	case protoreflect.StringKind:
 223  		s := val.String()
 224  		if !e.opts.allowInvalidUTF8 && strs.EnforceUTF8(fd) && !utf8.ValidString(s) {
 225  			return errors.InvalidUTF8(string(fd.FullName()))
 226  		}
 227  		e.WriteString(s)
 228  
 229  	case protoreflect.Int32Kind, protoreflect.Int64Kind,
 230  		protoreflect.Sint32Kind, protoreflect.Sint64Kind,
 231  		protoreflect.Sfixed32Kind, protoreflect.Sfixed64Kind:
 232  		e.WriteInt(val.Int())
 233  
 234  	case protoreflect.Uint32Kind, protoreflect.Uint64Kind,
 235  		protoreflect.Fixed32Kind, protoreflect.Fixed64Kind:
 236  		e.WriteUint(val.Uint())
 237  
 238  	case protoreflect.FloatKind:
 239  		// Encoder.WriteFloat handles the special numbers NaN and infinites.
 240  		e.WriteFloat(val.Float(), 32)
 241  
 242  	case protoreflect.DoubleKind:
 243  		// Encoder.WriteFloat handles the special numbers NaN and infinites.
 244  		e.WriteFloat(val.Float(), 64)
 245  
 246  	case protoreflect.BytesKind:
 247  		e.WriteString(string(val.Bytes()))
 248  
 249  	case protoreflect.EnumKind:
 250  		num := val.Enum()
 251  		if desc := fd.Enum().Values().ByNumber(num); desc != nil {
 252  			e.WriteLiteral(string(desc.Name()))
 253  		} else {
 254  			// Use numeric value if there is no enum description.
 255  			e.WriteInt(int64(num))
 256  		}
 257  
 258  	case protoreflect.MessageKind, protoreflect.GroupKind:
 259  		return e.marshalMessage(val.Message(), true)
 260  
 261  	default:
 262  		panic(fmt.Sprintf("%v has unknown kind: %v", fd.FullName(), kind))
 263  	}
 264  	return nil
 265  }
 266  
 267  // marshalList marshals the given protoreflect.List as multiple name-value fields.
 268  func (e encoder) marshalList(name string, list protoreflect.List, fd protoreflect.FieldDescriptor) error {
 269  	size := list.Len()
 270  	for i := 0; i < size; i++ {
 271  		e.WriteName(name)
 272  		if err := e.marshalSingular(list.Get(i), fd); err != nil {
 273  			return err
 274  		}
 275  	}
 276  	return nil
 277  }
 278  
 279  // marshalMap marshals the given protoreflect.Map as multiple name-value fields.
 280  func (e encoder) marshalMap(name string, mmap protoreflect.Map, fd protoreflect.FieldDescriptor) error {
 281  	var err error
 282  	order.RangeEntries(mmap, order.GenericKeyOrder, func(key protoreflect.MapKey, val protoreflect.Value) bool {
 283  		e.WriteName(name)
 284  		e.StartMessage()
 285  		defer e.EndMessage()
 286  
 287  		e.WriteName(string(genid.MapEntry_Key_field_name))
 288  		err = e.marshalSingular(key.Value(), fd.MapKey())
 289  		if err != nil {
 290  			return false
 291  		}
 292  
 293  		e.WriteName(string(genid.MapEntry_Value_field_name))
 294  		err = e.marshalSingular(val, fd.MapValue())
 295  		if err != nil {
 296  			return false
 297  		}
 298  		return true
 299  	})
 300  	return err
 301  }
 302  
 303  // marshalUnknown parses the given []byte and marshals fields out.
 304  // This function assumes proper encoding in the given []byte.
 305  func (e encoder) marshalUnknown(b []byte) {
 306  	const dec = 10
 307  	const hex = 16
 308  	for len(b) > 0 {
 309  		num, wtype, n := protowire.ConsumeTag(b)
 310  		b = b[n:]
 311  		e.WriteName(strconv.FormatInt(int64(num), dec))
 312  
 313  		switch wtype {
 314  		case protowire.VarintType:
 315  			var v uint64
 316  			v, n = protowire.ConsumeVarint(b)
 317  			e.WriteUint(v)
 318  		case protowire.Fixed32Type:
 319  			var v uint32
 320  			v, n = protowire.ConsumeFixed32(b)
 321  			e.WriteLiteral("0x" + strconv.FormatUint(uint64(v), hex))
 322  		case protowire.Fixed64Type:
 323  			var v uint64
 324  			v, n = protowire.ConsumeFixed64(b)
 325  			e.WriteLiteral("0x" + strconv.FormatUint(v, hex))
 326  		case protowire.BytesType:
 327  			var v []byte
 328  			v, n = protowire.ConsumeBytes(b)
 329  			e.WriteString(string(v))
 330  		case protowire.StartGroupType:
 331  			e.StartMessage()
 332  			var v []byte
 333  			v, n = protowire.ConsumeGroup(num, b)
 334  			e.marshalUnknown(v)
 335  			e.EndMessage()
 336  		default:
 337  			panic(fmt.Sprintf("prototext: error parsing unknown field wire type: %v", wtype))
 338  		}
 339  
 340  		b = b[n:]
 341  	}
 342  }
 343  
 344  // marshalAny marshals the given google.protobuf.Any message in expanded form.
 345  // It returns true if it was able to marshal, else false.
 346  func (e encoder) marshalAny(any protoreflect.Message) bool {
 347  	// Construct the embedded message.
 348  	fds := any.Descriptor().Fields()
 349  	fdType := fds.ByNumber(genid.Any_TypeUrl_field_number)
 350  	typeURL := any.Get(fdType).String()
 351  	mt, err := e.opts.Resolver.FindMessageByURL(typeURL)
 352  	if err != nil {
 353  		return false
 354  	}
 355  	m := mt.New().Interface()
 356  
 357  	// Unmarshal bytes into embedded message.
 358  	fdValue := fds.ByNumber(genid.Any_Value_field_number)
 359  	value := any.Get(fdValue)
 360  	err = proto.UnmarshalOptions{
 361  		AllowPartial: true,
 362  		Resolver:     e.opts.Resolver,
 363  	}.Unmarshal(value.Bytes(), m)
 364  	if err != nil {
 365  		return false
 366  	}
 367  
 368  	// Get current encoder position. If marshaling fails, reset encoder output
 369  	// back to this position.
 370  	pos := e.Snapshot()
 371  
 372  	// Field name is the proto field name enclosed in [].
 373  	e.WriteName("[" + typeURL + "]")
 374  	err = e.marshalMessage(m.ProtoReflect(), true)
 375  	if err != nil {
 376  		e.Reset(pos)
 377  		return false
 378  	}
 379  	return true
 380  }
 381