decode_string.go raw

   1  // Copyright 2018 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package json
   6  
   7  import (
   8  	"strconv"
   9  	"unicode"
  10  	"unicode/utf16"
  11  	"unicode/utf8"
  12  
  13  	"google.golang.org/protobuf/internal/strs"
  14  )
  15  
  16  func (d *Decoder) parseString(in []byte) (string, int, error) {
  17  	in0 := in
  18  	if len(in) == 0 {
  19  		return "", 0, ErrUnexpectedEOF
  20  	}
  21  	if in[0] != '"' {
  22  		return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q at start of string", in[0])
  23  	}
  24  	in = in[1:]
  25  	i := indexNeedEscapeInBytes(in)
  26  	in, out := in[i:], in[:i:i] // set cap to prevent mutations
  27  	for len(in) > 0 {
  28  		switch r, n := utf8.DecodeRune(in); {
  29  		case r == utf8.RuneError && n == 1:
  30  			return "", 0, d.newSyntaxError(d.currPos(), "invalid UTF-8 in string")
  31  		case r < ' ':
  32  			return "", 0, d.newSyntaxError(d.currPos(), "invalid character %q in string", r)
  33  		case r == '"':
  34  			in = in[1:]
  35  			n := len(in0) - len(in)
  36  			return string(out), n, nil
  37  		case r == '\\':
  38  			if len(in) < 2 {
  39  				return "", 0, ErrUnexpectedEOF
  40  			}
  41  			switch r := in[1]; r {
  42  			case '"', '\\', '/':
  43  				in, out = in[2:], append(out, r)
  44  			case 'b':
  45  				in, out = in[2:], append(out, '\b')
  46  			case 'f':
  47  				in, out = in[2:], append(out, '\f')
  48  			case 'n':
  49  				in, out = in[2:], append(out, '\n')
  50  			case 'r':
  51  				in, out = in[2:], append(out, '\r')
  52  			case 't':
  53  				in, out = in[2:], append(out, '\t')
  54  			case 'u':
  55  				if len(in) < 6 {
  56  					return "", 0, ErrUnexpectedEOF
  57  				}
  58  				v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  59  				if err != nil {
  60  					return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
  61  				}
  62  				in = in[6:]
  63  
  64  				r := rune(v)
  65  				if utf16.IsSurrogate(r) {
  66  					if len(in) < 6 {
  67  						return "", 0, ErrUnexpectedEOF
  68  					}
  69  					v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
  70  					r = utf16.DecodeRune(r, rune(v))
  71  					if in[0] != '\\' || in[1] != 'u' ||
  72  						r == unicode.ReplacementChar || err != nil {
  73  						return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:6])
  74  					}
  75  					in = in[6:]
  76  				}
  77  				out = append(out, string(r)...)
  78  			default:
  79  				return "", 0, d.newSyntaxError(d.currPos(), "invalid escape code %q in string", in[:2])
  80  			}
  81  		default:
  82  			i := indexNeedEscapeInBytes(in[n:])
  83  			in, out = in[n+i:], append(out, in[:n+i]...)
  84  		}
  85  	}
  86  	return "", 0, ErrUnexpectedEOF
  87  }
  88  
  89  // indexNeedEscapeInBytes returns the index of the character that needs
  90  // escaping. If no characters need escaping, this returns the input length.
  91  func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }
  92