stream_str.go raw

   1  package jsoniter
   2  
   3  import (
   4  	"unicode/utf8"
   5  )
   6  
   7  // htmlSafeSet holds the value true if the ASCII character with the given
   8  // array position can be safely represented inside a JSON string, embedded
   9  // inside of HTML <script> tags, without any additional escaping.
  10  //
  11  // All values are true except for the ASCII control characters (0-31), the
  12  // double quote ("), the backslash character ("\"), HTML opening and closing
  13  // tags ("<" and ">"), and the ampersand ("&").
  14  var htmlSafeSet = [utf8.RuneSelf]bool{
  15  	' ':      true,
  16  	'!':      true,
  17  	'"':      false,
  18  	'#':      true,
  19  	'$':      true,
  20  	'%':      true,
  21  	'&':      false,
  22  	'\'':     true,
  23  	'(':      true,
  24  	')':      true,
  25  	'*':      true,
  26  	'+':      true,
  27  	',':      true,
  28  	'-':      true,
  29  	'.':      true,
  30  	'/':      true,
  31  	'0':      true,
  32  	'1':      true,
  33  	'2':      true,
  34  	'3':      true,
  35  	'4':      true,
  36  	'5':      true,
  37  	'6':      true,
  38  	'7':      true,
  39  	'8':      true,
  40  	'9':      true,
  41  	':':      true,
  42  	';':      true,
  43  	'<':      false,
  44  	'=':      true,
  45  	'>':      false,
  46  	'?':      true,
  47  	'@':      true,
  48  	'A':      true,
  49  	'B':      true,
  50  	'C':      true,
  51  	'D':      true,
  52  	'E':      true,
  53  	'F':      true,
  54  	'G':      true,
  55  	'H':      true,
  56  	'I':      true,
  57  	'J':      true,
  58  	'K':      true,
  59  	'L':      true,
  60  	'M':      true,
  61  	'N':      true,
  62  	'O':      true,
  63  	'P':      true,
  64  	'Q':      true,
  65  	'R':      true,
  66  	'S':      true,
  67  	'T':      true,
  68  	'U':      true,
  69  	'V':      true,
  70  	'W':      true,
  71  	'X':      true,
  72  	'Y':      true,
  73  	'Z':      true,
  74  	'[':      true,
  75  	'\\':     false,
  76  	']':      true,
  77  	'^':      true,
  78  	'_':      true,
  79  	'`':      true,
  80  	'a':      true,
  81  	'b':      true,
  82  	'c':      true,
  83  	'd':      true,
  84  	'e':      true,
  85  	'f':      true,
  86  	'g':      true,
  87  	'h':      true,
  88  	'i':      true,
  89  	'j':      true,
  90  	'k':      true,
  91  	'l':      true,
  92  	'm':      true,
  93  	'n':      true,
  94  	'o':      true,
  95  	'p':      true,
  96  	'q':      true,
  97  	'r':      true,
  98  	's':      true,
  99  	't':      true,
 100  	'u':      true,
 101  	'v':      true,
 102  	'w':      true,
 103  	'x':      true,
 104  	'y':      true,
 105  	'z':      true,
 106  	'{':      true,
 107  	'|':      true,
 108  	'}':      true,
 109  	'~':      true,
 110  	'\u007f': true,
 111  }
 112  
 113  // safeSet holds the value true if the ASCII character with the given array
 114  // position can be represented inside a JSON string without any further
 115  // escaping.
 116  //
 117  // All values are true except for the ASCII control characters (0-31), the
 118  // double quote ("), and the backslash character ("\").
 119  var safeSet = [utf8.RuneSelf]bool{
 120  	' ':      true,
 121  	'!':      true,
 122  	'"':      false,
 123  	'#':      true,
 124  	'$':      true,
 125  	'%':      true,
 126  	'&':      true,
 127  	'\'':     true,
 128  	'(':      true,
 129  	')':      true,
 130  	'*':      true,
 131  	'+':      true,
 132  	',':      true,
 133  	'-':      true,
 134  	'.':      true,
 135  	'/':      true,
 136  	'0':      true,
 137  	'1':      true,
 138  	'2':      true,
 139  	'3':      true,
 140  	'4':      true,
 141  	'5':      true,
 142  	'6':      true,
 143  	'7':      true,
 144  	'8':      true,
 145  	'9':      true,
 146  	':':      true,
 147  	';':      true,
 148  	'<':      true,
 149  	'=':      true,
 150  	'>':      true,
 151  	'?':      true,
 152  	'@':      true,
 153  	'A':      true,
 154  	'B':      true,
 155  	'C':      true,
 156  	'D':      true,
 157  	'E':      true,
 158  	'F':      true,
 159  	'G':      true,
 160  	'H':      true,
 161  	'I':      true,
 162  	'J':      true,
 163  	'K':      true,
 164  	'L':      true,
 165  	'M':      true,
 166  	'N':      true,
 167  	'O':      true,
 168  	'P':      true,
 169  	'Q':      true,
 170  	'R':      true,
 171  	'S':      true,
 172  	'T':      true,
 173  	'U':      true,
 174  	'V':      true,
 175  	'W':      true,
 176  	'X':      true,
 177  	'Y':      true,
 178  	'Z':      true,
 179  	'[':      true,
 180  	'\\':     false,
 181  	']':      true,
 182  	'^':      true,
 183  	'_':      true,
 184  	'`':      true,
 185  	'a':      true,
 186  	'b':      true,
 187  	'c':      true,
 188  	'd':      true,
 189  	'e':      true,
 190  	'f':      true,
 191  	'g':      true,
 192  	'h':      true,
 193  	'i':      true,
 194  	'j':      true,
 195  	'k':      true,
 196  	'l':      true,
 197  	'm':      true,
 198  	'n':      true,
 199  	'o':      true,
 200  	'p':      true,
 201  	'q':      true,
 202  	'r':      true,
 203  	's':      true,
 204  	't':      true,
 205  	'u':      true,
 206  	'v':      true,
 207  	'w':      true,
 208  	'x':      true,
 209  	'y':      true,
 210  	'z':      true,
 211  	'{':      true,
 212  	'|':      true,
 213  	'}':      true,
 214  	'~':      true,
 215  	'\u007f': true,
 216  }
 217  
 218  var hex = "0123456789abcdef"
 219  
 220  // WriteStringWithHTMLEscaped write string to stream with html special characters escaped
 221  func (stream *Stream) WriteStringWithHTMLEscaped(s string) {
 222  	valLen := len(s)
 223  	stream.buf = append(stream.buf, '"')
 224  	// write string, the fast path, without utf8 and escape support
 225  	i := 0
 226  	for ; i < valLen; i++ {
 227  		c := s[i]
 228  		if c < utf8.RuneSelf && htmlSafeSet[c] {
 229  			stream.buf = append(stream.buf, c)
 230  		} else {
 231  			break
 232  		}
 233  	}
 234  	if i == valLen {
 235  		stream.buf = append(stream.buf, '"')
 236  		return
 237  	}
 238  	writeStringSlowPathWithHTMLEscaped(stream, i, s, valLen)
 239  }
 240  
 241  func writeStringSlowPathWithHTMLEscaped(stream *Stream, i int, s string, valLen int) {
 242  	start := i
 243  	// for the remaining parts, we process them char by char
 244  	for i < valLen {
 245  		if b := s[i]; b < utf8.RuneSelf {
 246  			if htmlSafeSet[b] {
 247  				i++
 248  				continue
 249  			}
 250  			if start < i {
 251  				stream.WriteRaw(s[start:i])
 252  			}
 253  			switch b {
 254  			case '\\', '"':
 255  				stream.writeTwoBytes('\\', b)
 256  			case '\n':
 257  				stream.writeTwoBytes('\\', 'n')
 258  			case '\r':
 259  				stream.writeTwoBytes('\\', 'r')
 260  			case '\t':
 261  				stream.writeTwoBytes('\\', 't')
 262  			default:
 263  				// This encodes bytes < 0x20 except for \t, \n and \r.
 264  				// If escapeHTML is set, it also escapes <, >, and &
 265  				// because they can lead to security holes when
 266  				// user-controlled strings are rendered into JSON
 267  				// and served to some browsers.
 268  				stream.WriteRaw(`\u00`)
 269  				stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
 270  			}
 271  			i++
 272  			start = i
 273  			continue
 274  		}
 275  		c, size := utf8.DecodeRuneInString(s[i:])
 276  		if c == utf8.RuneError && size == 1 {
 277  			if start < i {
 278  				stream.WriteRaw(s[start:i])
 279  			}
 280  			stream.WriteRaw(`\ufffd`)
 281  			i++
 282  			start = i
 283  			continue
 284  		}
 285  		// U+2028 is LINE SEPARATOR.
 286  		// U+2029 is PARAGRAPH SEPARATOR.
 287  		// They are both technically valid characters in JSON strings,
 288  		// but don't work in JSONP, which has to be evaluated as JavaScript,
 289  		// and can lead to security holes there. It is valid JSON to
 290  		// escape them, so we do so unconditionally.
 291  		// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
 292  		if c == '\u2028' || c == '\u2029' {
 293  			if start < i {
 294  				stream.WriteRaw(s[start:i])
 295  			}
 296  			stream.WriteRaw(`\u202`)
 297  			stream.writeByte(hex[c&0xF])
 298  			i += size
 299  			start = i
 300  			continue
 301  		}
 302  		i += size
 303  	}
 304  	if start < len(s) {
 305  		stream.WriteRaw(s[start:])
 306  	}
 307  	stream.writeByte('"')
 308  }
 309  
 310  // WriteString write string to stream without html escape
 311  func (stream *Stream) WriteString(s string) {
 312  	valLen := len(s)
 313  	stream.buf = append(stream.buf, '"')
 314  	// write string, the fast path, without utf8 and escape support
 315  	i := 0
 316  	for ; i < valLen; i++ {
 317  		c := s[i]
 318  		if c > 31 && c != '"' && c != '\\' {
 319  			stream.buf = append(stream.buf, c)
 320  		} else {
 321  			break
 322  		}
 323  	}
 324  	if i == valLen {
 325  		stream.buf = append(stream.buf, '"')
 326  		return
 327  	}
 328  	writeStringSlowPath(stream, i, s, valLen)
 329  }
 330  
 331  func writeStringSlowPath(stream *Stream, i int, s string, valLen int) {
 332  	start := i
 333  	// for the remaining parts, we process them char by char
 334  	for i < valLen {
 335  		if b := s[i]; b < utf8.RuneSelf {
 336  			if safeSet[b] {
 337  				i++
 338  				continue
 339  			}
 340  			if start < i {
 341  				stream.WriteRaw(s[start:i])
 342  			}
 343  			switch b {
 344  			case '\\', '"':
 345  				stream.writeTwoBytes('\\', b)
 346  			case '\n':
 347  				stream.writeTwoBytes('\\', 'n')
 348  			case '\r':
 349  				stream.writeTwoBytes('\\', 'r')
 350  			case '\t':
 351  				stream.writeTwoBytes('\\', 't')
 352  			default:
 353  				// This encodes bytes < 0x20 except for \t, \n and \r.
 354  				// If escapeHTML is set, it also escapes <, >, and &
 355  				// because they can lead to security holes when
 356  				// user-controlled strings are rendered into JSON
 357  				// and served to some browsers.
 358  				stream.WriteRaw(`\u00`)
 359  				stream.writeTwoBytes(hex[b>>4], hex[b&0xF])
 360  			}
 361  			i++
 362  			start = i
 363  			continue
 364  		}
 365  		i++
 366  		continue
 367  	}
 368  	if start < len(s) {
 369  		stream.WriteRaw(s[start:])
 370  	}
 371  	stream.writeByte('"')
 372  }
 373