escape.go raw

   1  // Copyright 2016 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Copied and modified from Go 1.8 stdlib's encoding/json/#safeSet
   6  
   7  package json
   8  
   9  import (
  10  	"bytes"
  11  	"unicode/utf8"
  12  )
  13  
  14  // safeSet holds the value true if the ASCII character with the given array
  15  // position can be represented inside a JSON string without any further
  16  // escaping.
  17  //
  18  // All values are true except for the ASCII control characters (0-31), the
  19  // double quote ("), and the backslash character ("\").
  20  var safeSet = [utf8.RuneSelf]bool{
  21  	' ':      true,
  22  	'!':      true,
  23  	'"':      false,
  24  	'#':      true,
  25  	'$':      true,
  26  	'%':      true,
  27  	'&':      true,
  28  	'\'':     true,
  29  	'(':      true,
  30  	')':      true,
  31  	'*':      true,
  32  	'+':      true,
  33  	',':      true,
  34  	'-':      true,
  35  	'.':      true,
  36  	'/':      true,
  37  	'0':      true,
  38  	'1':      true,
  39  	'2':      true,
  40  	'3':      true,
  41  	'4':      true,
  42  	'5':      true,
  43  	'6':      true,
  44  	'7':      true,
  45  	'8':      true,
  46  	'9':      true,
  47  	':':      true,
  48  	';':      true,
  49  	'<':      true,
  50  	'=':      true,
  51  	'>':      true,
  52  	'?':      true,
  53  	'@':      true,
  54  	'A':      true,
  55  	'B':      true,
  56  	'C':      true,
  57  	'D':      true,
  58  	'E':      true,
  59  	'F':      true,
  60  	'G':      true,
  61  	'H':      true,
  62  	'I':      true,
  63  	'J':      true,
  64  	'K':      true,
  65  	'L':      true,
  66  	'M':      true,
  67  	'N':      true,
  68  	'O':      true,
  69  	'P':      true,
  70  	'Q':      true,
  71  	'R':      true,
  72  	'S':      true,
  73  	'T':      true,
  74  	'U':      true,
  75  	'V':      true,
  76  	'W':      true,
  77  	'X':      true,
  78  	'Y':      true,
  79  	'Z':      true,
  80  	'[':      true,
  81  	'\\':     false,
  82  	']':      true,
  83  	'^':      true,
  84  	'_':      true,
  85  	'`':      true,
  86  	'a':      true,
  87  	'b':      true,
  88  	'c':      true,
  89  	'd':      true,
  90  	'e':      true,
  91  	'f':      true,
  92  	'g':      true,
  93  	'h':      true,
  94  	'i':      true,
  95  	'j':      true,
  96  	'k':      true,
  97  	'l':      true,
  98  	'm':      true,
  99  	'n':      true,
 100  	'o':      true,
 101  	'p':      true,
 102  	'q':      true,
 103  	'r':      true,
 104  	's':      true,
 105  	't':      true,
 106  	'u':      true,
 107  	'v':      true,
 108  	'w':      true,
 109  	'x':      true,
 110  	'y':      true,
 111  	'z':      true,
 112  	'{':      true,
 113  	'|':      true,
 114  	'}':      true,
 115  	'~':      true,
 116  	'\u007f': true,
 117  }
 118  
 119  // copied from Go 1.8 stdlib's encoding/json/#hex
 120  var hex = "0123456789abcdef"
 121  
 122  // escapeStringBytes escapes and writes the passed in string bytes to the dst
 123  // buffer
 124  //
 125  // Copied and modifed from Go 1.8 stdlib's encodeing/json/#encodeState.stringBytes
 126  func escapeStringBytes(e *bytes.Buffer, s []byte) {
 127  	e.WriteByte('"')
 128  	start := 0
 129  	for i := 0; i < len(s); {
 130  		if b := s[i]; b < utf8.RuneSelf {
 131  			if safeSet[b] {
 132  				i++
 133  				continue
 134  			}
 135  			if start < i {
 136  				e.Write(s[start:i])
 137  			}
 138  			switch b {
 139  			case '\\', '"':
 140  				e.WriteByte('\\')
 141  				e.WriteByte(b)
 142  			case '\n':
 143  				e.WriteByte('\\')
 144  				e.WriteByte('n')
 145  			case '\r':
 146  				e.WriteByte('\\')
 147  				e.WriteByte('r')
 148  			case '\t':
 149  				e.WriteByte('\\')
 150  				e.WriteByte('t')
 151  			default:
 152  				// This encodes bytes < 0x20 except for \t, \n and \r.
 153  				// If escapeHTML is set, it also escapes <, >, and &
 154  				// because they can lead to security holes when
 155  				// user-controlled strings are rendered into JSON
 156  				// and served to some browsers.
 157  				e.WriteString(`\u00`)
 158  				e.WriteByte(hex[b>>4])
 159  				e.WriteByte(hex[b&0xF])
 160  			}
 161  			i++
 162  			start = i
 163  			continue
 164  		}
 165  		c, size := utf8.DecodeRune(s[i:])
 166  		if c == utf8.RuneError && size == 1 {
 167  			if start < i {
 168  				e.Write(s[start:i])
 169  			}
 170  			e.WriteString(`\ufffd`)
 171  			i += size
 172  			start = i
 173  			continue
 174  		}
 175  		// U+2028 is LINE SEPARATOR.
 176  		// U+2029 is PARAGRAPH SEPARATOR.
 177  		// They are both technically valid characters in JSON strings,
 178  		// but don't work in JSONP, which has to be evaluated as JavaScript,
 179  		// and can lead to security holes there. It is valid JSON to
 180  		// escape them, so we do so unconditionally.
 181  		// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
 182  		if c == '\u2028' || c == '\u2029' {
 183  			if start < i {
 184  				e.Write(s[start:i])
 185  			}
 186  			e.WriteString(`\u202`)
 187  			e.WriteByte(hex[c&0xF])
 188  			i += size
 189  			start = i
 190  			continue
 191  		}
 192  		i += size
 193  	}
 194  	if start < len(s) {
 195  		e.Write(s[start:])
 196  	}
 197  	e.WriteByte('"')
 198  }
 199