text_decode.go raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package proto
   6  
   7  import (
   8  	"encoding"
   9  	"errors"
  10  	"fmt"
  11  	"reflect"
  12  	"strconv"
  13  	"strings"
  14  	"unicode/utf8"
  15  
  16  	"google.golang.org/protobuf/encoding/prototext"
  17  	protoV2 "google.golang.org/protobuf/proto"
  18  	"google.golang.org/protobuf/reflect/protoreflect"
  19  	"google.golang.org/protobuf/reflect/protoregistry"
  20  )
  21  
  22  const wrapTextUnmarshalV2 = false
  23  
  24  // ParseError is returned by UnmarshalText.
  25  type ParseError struct {
  26  	Message string
  27  
  28  	// Deprecated: Do not use.
  29  	Line, Offset int
  30  }
  31  
  32  func (e *ParseError) Error() string {
  33  	if wrapTextUnmarshalV2 {
  34  		return e.Message
  35  	}
  36  	if e.Line == 1 {
  37  		return fmt.Sprintf("line 1.%d: %v", e.Offset, e.Message)
  38  	}
  39  	return fmt.Sprintf("line %d: %v", e.Line, e.Message)
  40  }
  41  
  42  // UnmarshalText parses a proto text formatted string into m.
  43  func UnmarshalText(s string, m Message) error {
  44  	if u, ok := m.(encoding.TextUnmarshaler); ok {
  45  		return u.UnmarshalText([]byte(s))
  46  	}
  47  
  48  	m.Reset()
  49  	mi := MessageV2(m)
  50  
  51  	if wrapTextUnmarshalV2 {
  52  		err := prototext.UnmarshalOptions{
  53  			AllowPartial: true,
  54  		}.Unmarshal([]byte(s), mi)
  55  		if err != nil {
  56  			return &ParseError{Message: err.Error()}
  57  		}
  58  		return checkRequiredNotSet(mi)
  59  	} else {
  60  		if err := newTextParser(s).unmarshalMessage(mi.ProtoReflect(), ""); err != nil {
  61  			return err
  62  		}
  63  		return checkRequiredNotSet(mi)
  64  	}
  65  }
  66  
  67  type textParser struct {
  68  	s            string // remaining input
  69  	done         bool   // whether the parsing is finished (success or error)
  70  	backed       bool   // whether back() was called
  71  	offset, line int
  72  	cur          token
  73  }
  74  
  75  type token struct {
  76  	value    string
  77  	err      *ParseError
  78  	line     int    // line number
  79  	offset   int    // byte number from start of input, not start of line
  80  	unquoted string // the unquoted version of value, if it was a quoted string
  81  }
  82  
  83  func newTextParser(s string) *textParser {
  84  	p := new(textParser)
  85  	p.s = s
  86  	p.line = 1
  87  	p.cur.line = 1
  88  	return p
  89  }
  90  
  91  func (p *textParser) unmarshalMessage(m protoreflect.Message, terminator string) (err error) {
  92  	md := m.Descriptor()
  93  	fds := md.Fields()
  94  
  95  	// A struct is a sequence of "name: value", terminated by one of
  96  	// '>' or '}', or the end of the input.  A name may also be
  97  	// "[extension]" or "[type/url]".
  98  	//
  99  	// The whole struct can also be an expanded Any message, like:
 100  	// [type/url] < ... struct contents ... >
 101  	seen := make(map[protoreflect.FieldNumber]bool)
 102  	for {
 103  		tok := p.next()
 104  		if tok.err != nil {
 105  			return tok.err
 106  		}
 107  		if tok.value == terminator {
 108  			break
 109  		}
 110  		if tok.value == "[" {
 111  			if err := p.unmarshalExtensionOrAny(m, seen); err != nil {
 112  				return err
 113  			}
 114  			continue
 115  		}
 116  
 117  		// This is a normal, non-extension field.
 118  		name := protoreflect.Name(tok.value)
 119  		fd := fds.ByName(name)
 120  		switch {
 121  		case fd == nil:
 122  			gd := fds.ByName(protoreflect.Name(strings.ToLower(string(name))))
 123  			if gd != nil && gd.Kind() == protoreflect.GroupKind && gd.Message().Name() == name {
 124  				fd = gd
 125  			}
 126  		case fd.Kind() == protoreflect.GroupKind && fd.Message().Name() != name:
 127  			fd = nil
 128  		case fd.IsWeak() && fd.Message().IsPlaceholder():
 129  			fd = nil
 130  		}
 131  		if fd == nil {
 132  			typeName := string(md.FullName())
 133  			if m, ok := m.Interface().(Message); ok {
 134  				t := reflect.TypeOf(m)
 135  				if t.Kind() == reflect.Ptr {
 136  					typeName = t.Elem().String()
 137  				}
 138  			}
 139  			return p.errorf("unknown field name %q in %v", name, typeName)
 140  		}
 141  		if od := fd.ContainingOneof(); od != nil && m.WhichOneof(od) != nil {
 142  			return p.errorf("field '%s' would overwrite already parsed oneof '%s'", name, od.Name())
 143  		}
 144  		if fd.Cardinality() != protoreflect.Repeated && seen[fd.Number()] {
 145  			return p.errorf("non-repeated field %q was repeated", fd.Name())
 146  		}
 147  		seen[fd.Number()] = true
 148  
 149  		// Consume any colon.
 150  		if err := p.checkForColon(fd); err != nil {
 151  			return err
 152  		}
 153  
 154  		// Parse into the field.
 155  		v := m.Get(fd)
 156  		if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
 157  			v = m.Mutable(fd)
 158  		}
 159  		if v, err = p.unmarshalValue(v, fd); err != nil {
 160  			return err
 161  		}
 162  		m.Set(fd, v)
 163  
 164  		if err := p.consumeOptionalSeparator(); err != nil {
 165  			return err
 166  		}
 167  	}
 168  	return nil
 169  }
 170  
 171  func (p *textParser) unmarshalExtensionOrAny(m protoreflect.Message, seen map[protoreflect.FieldNumber]bool) error {
 172  	name, err := p.consumeExtensionOrAnyName()
 173  	if err != nil {
 174  		return err
 175  	}
 176  
 177  	// If it contains a slash, it's an Any type URL.
 178  	if slashIdx := strings.LastIndex(name, "/"); slashIdx >= 0 {
 179  		tok := p.next()
 180  		if tok.err != nil {
 181  			return tok.err
 182  		}
 183  		// consume an optional colon
 184  		if tok.value == ":" {
 185  			tok = p.next()
 186  			if tok.err != nil {
 187  				return tok.err
 188  			}
 189  		}
 190  
 191  		var terminator string
 192  		switch tok.value {
 193  		case "<":
 194  			terminator = ">"
 195  		case "{":
 196  			terminator = "}"
 197  		default:
 198  			return p.errorf("expected '{' or '<', found %q", tok.value)
 199  		}
 200  
 201  		mt, err := protoregistry.GlobalTypes.FindMessageByURL(name)
 202  		if err != nil {
 203  			return p.errorf("unrecognized message %q in google.protobuf.Any", name[slashIdx+len("/"):])
 204  		}
 205  		m2 := mt.New()
 206  		if err := p.unmarshalMessage(m2, terminator); err != nil {
 207  			return err
 208  		}
 209  		b, err := protoV2.Marshal(m2.Interface())
 210  		if err != nil {
 211  			return p.errorf("failed to marshal message of type %q: %v", name[slashIdx+len("/"):], err)
 212  		}
 213  
 214  		urlFD := m.Descriptor().Fields().ByName("type_url")
 215  		valFD := m.Descriptor().Fields().ByName("value")
 216  		if seen[urlFD.Number()] {
 217  			return p.errorf("Any message unpacked multiple times, or %q already set", urlFD.Name())
 218  		}
 219  		if seen[valFD.Number()] {
 220  			return p.errorf("Any message unpacked multiple times, or %q already set", valFD.Name())
 221  		}
 222  		m.Set(urlFD, protoreflect.ValueOfString(name))
 223  		m.Set(valFD, protoreflect.ValueOfBytes(b))
 224  		seen[urlFD.Number()] = true
 225  		seen[valFD.Number()] = true
 226  		return nil
 227  	}
 228  
 229  	xname := protoreflect.FullName(name)
 230  	xt, _ := protoregistry.GlobalTypes.FindExtensionByName(xname)
 231  	if xt == nil && isMessageSet(m.Descriptor()) {
 232  		xt, _ = protoregistry.GlobalTypes.FindExtensionByName(xname.Append("message_set_extension"))
 233  	}
 234  	if xt == nil {
 235  		return p.errorf("unrecognized extension %q", name)
 236  	}
 237  	fd := xt.TypeDescriptor()
 238  	if fd.ContainingMessage().FullName() != m.Descriptor().FullName() {
 239  		return p.errorf("extension field %q does not extend message %q", name, m.Descriptor().FullName())
 240  	}
 241  
 242  	if err := p.checkForColon(fd); err != nil {
 243  		return err
 244  	}
 245  
 246  	v := m.Get(fd)
 247  	if !m.Has(fd) && (fd.IsList() || fd.IsMap() || fd.Message() != nil) {
 248  		v = m.Mutable(fd)
 249  	}
 250  	v, err = p.unmarshalValue(v, fd)
 251  	if err != nil {
 252  		return err
 253  	}
 254  	m.Set(fd, v)
 255  	return p.consumeOptionalSeparator()
 256  }
 257  
 258  func (p *textParser) unmarshalValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
 259  	tok := p.next()
 260  	if tok.err != nil {
 261  		return v, tok.err
 262  	}
 263  	if tok.value == "" {
 264  		return v, p.errorf("unexpected EOF")
 265  	}
 266  
 267  	switch {
 268  	case fd.IsList():
 269  		lv := v.List()
 270  		var err error
 271  		if tok.value == "[" {
 272  			// Repeated field with list notation, like [1,2,3].
 273  			for {
 274  				vv := lv.NewElement()
 275  				vv, err = p.unmarshalSingularValue(vv, fd)
 276  				if err != nil {
 277  					return v, err
 278  				}
 279  				lv.Append(vv)
 280  
 281  				tok := p.next()
 282  				if tok.err != nil {
 283  					return v, tok.err
 284  				}
 285  				if tok.value == "]" {
 286  					break
 287  				}
 288  				if tok.value != "," {
 289  					return v, p.errorf("Expected ']' or ',' found %q", tok.value)
 290  				}
 291  			}
 292  			return v, nil
 293  		}
 294  
 295  		// One value of the repeated field.
 296  		p.back()
 297  		vv := lv.NewElement()
 298  		vv, err = p.unmarshalSingularValue(vv, fd)
 299  		if err != nil {
 300  			return v, err
 301  		}
 302  		lv.Append(vv)
 303  		return v, nil
 304  	case fd.IsMap():
 305  		// The map entry should be this sequence of tokens:
 306  		//	< key : KEY value : VALUE >
 307  		// However, implementations may omit key or value, and technically
 308  		// we should support them in any order.
 309  		var terminator string
 310  		switch tok.value {
 311  		case "<":
 312  			terminator = ">"
 313  		case "{":
 314  			terminator = "}"
 315  		default:
 316  			return v, p.errorf("expected '{' or '<', found %q", tok.value)
 317  		}
 318  
 319  		keyFD := fd.MapKey()
 320  		valFD := fd.MapValue()
 321  
 322  		mv := v.Map()
 323  		kv := keyFD.Default()
 324  		vv := mv.NewValue()
 325  		for {
 326  			tok := p.next()
 327  			if tok.err != nil {
 328  				return v, tok.err
 329  			}
 330  			if tok.value == terminator {
 331  				break
 332  			}
 333  			var err error
 334  			switch tok.value {
 335  			case "key":
 336  				if err := p.consumeToken(":"); err != nil {
 337  					return v, err
 338  				}
 339  				if kv, err = p.unmarshalSingularValue(kv, keyFD); err != nil {
 340  					return v, err
 341  				}
 342  				if err := p.consumeOptionalSeparator(); err != nil {
 343  					return v, err
 344  				}
 345  			case "value":
 346  				if err := p.checkForColon(valFD); err != nil {
 347  					return v, err
 348  				}
 349  				if vv, err = p.unmarshalSingularValue(vv, valFD); err != nil {
 350  					return v, err
 351  				}
 352  				if err := p.consumeOptionalSeparator(); err != nil {
 353  					return v, err
 354  				}
 355  			default:
 356  				p.back()
 357  				return v, p.errorf(`expected "key", "value", or %q, found %q`, terminator, tok.value)
 358  			}
 359  		}
 360  		mv.Set(kv.MapKey(), vv)
 361  		return v, nil
 362  	default:
 363  		p.back()
 364  		return p.unmarshalSingularValue(v, fd)
 365  	}
 366  }
 367  
 368  func (p *textParser) unmarshalSingularValue(v protoreflect.Value, fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
 369  	tok := p.next()
 370  	if tok.err != nil {
 371  		return v, tok.err
 372  	}
 373  	if tok.value == "" {
 374  		return v, p.errorf("unexpected EOF")
 375  	}
 376  
 377  	switch fd.Kind() {
 378  	case protoreflect.BoolKind:
 379  		switch tok.value {
 380  		case "true", "1", "t", "True":
 381  			return protoreflect.ValueOfBool(true), nil
 382  		case "false", "0", "f", "False":
 383  			return protoreflect.ValueOfBool(false), nil
 384  		}
 385  	case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
 386  		if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
 387  			return protoreflect.ValueOfInt32(int32(x)), nil
 388  		}
 389  
 390  		// The C++ parser accepts large positive hex numbers that uses
 391  		// two's complement arithmetic to represent negative numbers.
 392  		// This feature is here for backwards compatibility with C++.
 393  		if strings.HasPrefix(tok.value, "0x") {
 394  			if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
 395  				return protoreflect.ValueOfInt32(int32(-(int64(^x) + 1))), nil
 396  			}
 397  		}
 398  	case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
 399  		if x, err := strconv.ParseInt(tok.value, 0, 64); err == nil {
 400  			return protoreflect.ValueOfInt64(int64(x)), nil
 401  		}
 402  
 403  		// The C++ parser accepts large positive hex numbers that uses
 404  		// two's complement arithmetic to represent negative numbers.
 405  		// This feature is here for backwards compatibility with C++.
 406  		if strings.HasPrefix(tok.value, "0x") {
 407  			if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
 408  				return protoreflect.ValueOfInt64(int64(-(int64(^x) + 1))), nil
 409  			}
 410  		}
 411  	case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
 412  		if x, err := strconv.ParseUint(tok.value, 0, 32); err == nil {
 413  			return protoreflect.ValueOfUint32(uint32(x)), nil
 414  		}
 415  	case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
 416  		if x, err := strconv.ParseUint(tok.value, 0, 64); err == nil {
 417  			return protoreflect.ValueOfUint64(uint64(x)), nil
 418  		}
 419  	case protoreflect.FloatKind:
 420  		// Ignore 'f' for compatibility with output generated by C++,
 421  		// but don't remove 'f' when the value is "-inf" or "inf".
 422  		v := tok.value
 423  		if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
 424  			v = v[:len(v)-len("f")]
 425  		}
 426  		if x, err := strconv.ParseFloat(v, 32); err == nil {
 427  			return protoreflect.ValueOfFloat32(float32(x)), nil
 428  		}
 429  	case protoreflect.DoubleKind:
 430  		// Ignore 'f' for compatibility with output generated by C++,
 431  		// but don't remove 'f' when the value is "-inf" or "inf".
 432  		v := tok.value
 433  		if strings.HasSuffix(v, "f") && v != "-inf" && v != "inf" {
 434  			v = v[:len(v)-len("f")]
 435  		}
 436  		if x, err := strconv.ParseFloat(v, 64); err == nil {
 437  			return protoreflect.ValueOfFloat64(float64(x)), nil
 438  		}
 439  	case protoreflect.StringKind:
 440  		if isQuote(tok.value[0]) {
 441  			return protoreflect.ValueOfString(tok.unquoted), nil
 442  		}
 443  	case protoreflect.BytesKind:
 444  		if isQuote(tok.value[0]) {
 445  			return protoreflect.ValueOfBytes([]byte(tok.unquoted)), nil
 446  		}
 447  	case protoreflect.EnumKind:
 448  		if x, err := strconv.ParseInt(tok.value, 0, 32); err == nil {
 449  			return protoreflect.ValueOfEnum(protoreflect.EnumNumber(x)), nil
 450  		}
 451  		vd := fd.Enum().Values().ByName(protoreflect.Name(tok.value))
 452  		if vd != nil {
 453  			return protoreflect.ValueOfEnum(vd.Number()), nil
 454  		}
 455  	case protoreflect.MessageKind, protoreflect.GroupKind:
 456  		var terminator string
 457  		switch tok.value {
 458  		case "{":
 459  			terminator = "}"
 460  		case "<":
 461  			terminator = ">"
 462  		default:
 463  			return v, p.errorf("expected '{' or '<', found %q", tok.value)
 464  		}
 465  		err := p.unmarshalMessage(v.Message(), terminator)
 466  		return v, err
 467  	default:
 468  		panic(fmt.Sprintf("invalid kind %v", fd.Kind()))
 469  	}
 470  	return v, p.errorf("invalid %v: %v", fd.Kind(), tok.value)
 471  }
 472  
 473  // Consume a ':' from the input stream (if the next token is a colon),
 474  // returning an error if a colon is needed but not present.
 475  func (p *textParser) checkForColon(fd protoreflect.FieldDescriptor) *ParseError {
 476  	tok := p.next()
 477  	if tok.err != nil {
 478  		return tok.err
 479  	}
 480  	if tok.value != ":" {
 481  		if fd.Message() == nil {
 482  			return p.errorf("expected ':', found %q", tok.value)
 483  		}
 484  		p.back()
 485  	}
 486  	return nil
 487  }
 488  
 489  // consumeExtensionOrAnyName consumes an extension name or an Any type URL and
 490  // the following ']'. It returns the name or URL consumed.
 491  func (p *textParser) consumeExtensionOrAnyName() (string, error) {
 492  	tok := p.next()
 493  	if tok.err != nil {
 494  		return "", tok.err
 495  	}
 496  
 497  	// If extension name or type url is quoted, it's a single token.
 498  	if len(tok.value) > 2 && isQuote(tok.value[0]) && tok.value[len(tok.value)-1] == tok.value[0] {
 499  		name, err := unquoteC(tok.value[1:len(tok.value)-1], rune(tok.value[0]))
 500  		if err != nil {
 501  			return "", err
 502  		}
 503  		return name, p.consumeToken("]")
 504  	}
 505  
 506  	// Consume everything up to "]"
 507  	var parts []string
 508  	for tok.value != "]" {
 509  		parts = append(parts, tok.value)
 510  		tok = p.next()
 511  		if tok.err != nil {
 512  			return "", p.errorf("unrecognized type_url or extension name: %s", tok.err)
 513  		}
 514  		if p.done && tok.value != "]" {
 515  			return "", p.errorf("unclosed type_url or extension name")
 516  		}
 517  	}
 518  	return strings.Join(parts, ""), nil
 519  }
 520  
 521  // consumeOptionalSeparator consumes an optional semicolon or comma.
 522  // It is used in unmarshalMessage to provide backward compatibility.
 523  func (p *textParser) consumeOptionalSeparator() error {
 524  	tok := p.next()
 525  	if tok.err != nil {
 526  		return tok.err
 527  	}
 528  	if tok.value != ";" && tok.value != "," {
 529  		p.back()
 530  	}
 531  	return nil
 532  }
 533  
 534  func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
 535  	pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
 536  	p.cur.err = pe
 537  	p.done = true
 538  	return pe
 539  }
 540  
 541  func (p *textParser) skipWhitespace() {
 542  	i := 0
 543  	for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
 544  		if p.s[i] == '#' {
 545  			// comment; skip to end of line or input
 546  			for i < len(p.s) && p.s[i] != '\n' {
 547  				i++
 548  			}
 549  			if i == len(p.s) {
 550  				break
 551  			}
 552  		}
 553  		if p.s[i] == '\n' {
 554  			p.line++
 555  		}
 556  		i++
 557  	}
 558  	p.offset += i
 559  	p.s = p.s[i:len(p.s)]
 560  	if len(p.s) == 0 {
 561  		p.done = true
 562  	}
 563  }
 564  
 565  func (p *textParser) advance() {
 566  	// Skip whitespace
 567  	p.skipWhitespace()
 568  	if p.done {
 569  		return
 570  	}
 571  
 572  	// Start of non-whitespace
 573  	p.cur.err = nil
 574  	p.cur.offset, p.cur.line = p.offset, p.line
 575  	p.cur.unquoted = ""
 576  	switch p.s[0] {
 577  	case '<', '>', '{', '}', ':', '[', ']', ';', ',', '/':
 578  		// Single symbol
 579  		p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
 580  	case '"', '\'':
 581  		// Quoted string
 582  		i := 1
 583  		for i < len(p.s) && p.s[i] != p.s[0] && p.s[i] != '\n' {
 584  			if p.s[i] == '\\' && i+1 < len(p.s) {
 585  				// skip escaped char
 586  				i++
 587  			}
 588  			i++
 589  		}
 590  		if i >= len(p.s) || p.s[i] != p.s[0] {
 591  			p.errorf("unmatched quote")
 592  			return
 593  		}
 594  		unq, err := unquoteC(p.s[1:i], rune(p.s[0]))
 595  		if err != nil {
 596  			p.errorf("invalid quoted string %s: %v", p.s[0:i+1], err)
 597  			return
 598  		}
 599  		p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
 600  		p.cur.unquoted = unq
 601  	default:
 602  		i := 0
 603  		for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
 604  			i++
 605  		}
 606  		if i == 0 {
 607  			p.errorf("unexpected byte %#x", p.s[0])
 608  			return
 609  		}
 610  		p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
 611  	}
 612  	p.offset += len(p.cur.value)
 613  }
 614  
 615  // Back off the parser by one token. Can only be done between calls to next().
 616  // It makes the next advance() a no-op.
 617  func (p *textParser) back() { p.backed = true }
 618  
 619  // Advances the parser and returns the new current token.
 620  func (p *textParser) next() *token {
 621  	if p.backed || p.done {
 622  		p.backed = false
 623  		return &p.cur
 624  	}
 625  	p.advance()
 626  	if p.done {
 627  		p.cur.value = ""
 628  	} else if len(p.cur.value) > 0 && isQuote(p.cur.value[0]) {
 629  		// Look for multiple quoted strings separated by whitespace,
 630  		// and concatenate them.
 631  		cat := p.cur
 632  		for {
 633  			p.skipWhitespace()
 634  			if p.done || !isQuote(p.s[0]) {
 635  				break
 636  			}
 637  			p.advance()
 638  			if p.cur.err != nil {
 639  				return &p.cur
 640  			}
 641  			cat.value += " " + p.cur.value
 642  			cat.unquoted += p.cur.unquoted
 643  		}
 644  		p.done = false // parser may have seen EOF, but we want to return cat
 645  		p.cur = cat
 646  	}
 647  	return &p.cur
 648  }
 649  
 650  func (p *textParser) consumeToken(s string) error {
 651  	tok := p.next()
 652  	if tok.err != nil {
 653  		return tok.err
 654  	}
 655  	if tok.value != s {
 656  		p.back()
 657  		return p.errorf("expected %q, found %q", s, tok.value)
 658  	}
 659  	return nil
 660  }
 661  
 662  var errBadUTF8 = errors.New("proto: bad UTF-8")
 663  
 664  func unquoteC(s string, quote rune) (string, error) {
 665  	// This is based on C++'s tokenizer.cc.
 666  	// Despite its name, this is *not* parsing C syntax.
 667  	// For instance, "\0" is an invalid quoted string.
 668  
 669  	// Avoid allocation in trivial cases.
 670  	simple := true
 671  	for _, r := range s {
 672  		if r == '\\' || r == quote {
 673  			simple = false
 674  			break
 675  		}
 676  	}
 677  	if simple {
 678  		return s, nil
 679  	}
 680  
 681  	buf := make([]byte, 0, 3*len(s)/2)
 682  	for len(s) > 0 {
 683  		r, n := utf8.DecodeRuneInString(s)
 684  		if r == utf8.RuneError && n == 1 {
 685  			return "", errBadUTF8
 686  		}
 687  		s = s[n:]
 688  		if r != '\\' {
 689  			if r < utf8.RuneSelf {
 690  				buf = append(buf, byte(r))
 691  			} else {
 692  				buf = append(buf, string(r)...)
 693  			}
 694  			continue
 695  		}
 696  
 697  		ch, tail, err := unescape(s)
 698  		if err != nil {
 699  			return "", err
 700  		}
 701  		buf = append(buf, ch...)
 702  		s = tail
 703  	}
 704  	return string(buf), nil
 705  }
 706  
 707  func unescape(s string) (ch string, tail string, err error) {
 708  	r, n := utf8.DecodeRuneInString(s)
 709  	if r == utf8.RuneError && n == 1 {
 710  		return "", "", errBadUTF8
 711  	}
 712  	s = s[n:]
 713  	switch r {
 714  	case 'a':
 715  		return "\a", s, nil
 716  	case 'b':
 717  		return "\b", s, nil
 718  	case 'f':
 719  		return "\f", s, nil
 720  	case 'n':
 721  		return "\n", s, nil
 722  	case 'r':
 723  		return "\r", s, nil
 724  	case 't':
 725  		return "\t", s, nil
 726  	case 'v':
 727  		return "\v", s, nil
 728  	case '?':
 729  		return "?", s, nil // trigraph workaround
 730  	case '\'', '"', '\\':
 731  		return string(r), s, nil
 732  	case '0', '1', '2', '3', '4', '5', '6', '7':
 733  		if len(s) < 2 {
 734  			return "", "", fmt.Errorf(`\%c requires 2 following digits`, r)
 735  		}
 736  		ss := string(r) + s[:2]
 737  		s = s[2:]
 738  		i, err := strconv.ParseUint(ss, 8, 8)
 739  		if err != nil {
 740  			return "", "", fmt.Errorf(`\%s contains non-octal digits`, ss)
 741  		}
 742  		return string([]byte{byte(i)}), s, nil
 743  	case 'x', 'X', 'u', 'U':
 744  		var n int
 745  		switch r {
 746  		case 'x', 'X':
 747  			n = 2
 748  		case 'u':
 749  			n = 4
 750  		case 'U':
 751  			n = 8
 752  		}
 753  		if len(s) < n {
 754  			return "", "", fmt.Errorf(`\%c requires %d following digits`, r, n)
 755  		}
 756  		ss := s[:n]
 757  		s = s[n:]
 758  		i, err := strconv.ParseUint(ss, 16, 64)
 759  		if err != nil {
 760  			return "", "", fmt.Errorf(`\%c%s contains non-hexadecimal digits`, r, ss)
 761  		}
 762  		if r == 'x' || r == 'X' {
 763  			return string([]byte{byte(i)}), s, nil
 764  		}
 765  		if i > utf8.MaxRune {
 766  			return "", "", fmt.Errorf(`\%c%s is not a valid Unicode code point`, r, ss)
 767  		}
 768  		return string(rune(i)), s, nil
 769  	}
 770  	return "", "", fmt.Errorf(`unknown escape \%c`, r)
 771  }
 772  
 773  func isIdentOrNumberChar(c byte) bool {
 774  	switch {
 775  	case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
 776  		return true
 777  	case '0' <= c && c <= '9':
 778  		return true
 779  	}
 780  	switch c {
 781  	case '-', '+', '.', '_':
 782  		return true
 783  	}
 784  	return false
 785  }
 786  
 787  func isWhitespace(c byte) bool {
 788  	switch c {
 789  	case ' ', '\t', '\n', '\r':
 790  		return true
 791  	}
 792  	return false
 793  }
 794  
 795  func isQuote(c byte) bool {
 796  	switch c {
 797  	case '"', '\'':
 798  		return true
 799  	}
 800  	return false
 801  }
 802