escape.go raw

   1  // Copyright 2009 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Copied and modified from Go 1.14 stdlib's encoding/xml
   6  
   7  package xml
   8  
   9  import (
  10  	"unicode/utf8"
  11  )
  12  
  13  // Copied from Go 1.14 stdlib's encoding/xml
  14  var (
  15  	escQuot = []byte(""") // shorter than """
  16  	escApos = []byte("'") // shorter than "'"
  17  	escAmp  = []byte("&")
  18  	escLT   = []byte("<")
  19  	escGT   = []byte(">")
  20  	escTab  = []byte("	")
  21  	escNL   = []byte("
")
  22  	escCR   = []byte("
")
  23  	escFFFD = []byte("\uFFFD") // Unicode replacement character
  24  
  25  	// Additional Escapes
  26  	escNextLine = []byte("…")
  27  	escLS       = []byte("
")
  28  )
  29  
  30  // Decide whether the given rune is in the XML Character Range, per
  31  // the Char production of https://www.xml.com/axml/testaxml.htm,
  32  // Section 2.2 Characters.
  33  func isInCharacterRange(r rune) (inrange bool) {
  34  	return r == 0x09 ||
  35  		r == 0x0A ||
  36  		r == 0x0D ||
  37  		r >= 0x20 && r <= 0xD7FF ||
  38  		r >= 0xE000 && r <= 0xFFFD ||
  39  		r >= 0x10000 && r <= 0x10FFFF
  40  }
  41  
  42  // TODO: When do we need to escape the string?
  43  // Based on encoding/xml escapeString from the Go Standard Library.
  44  // https://golang.org/src/encoding/xml/xml.go
  45  func escapeString(e writer, s string) {
  46  	var esc []byte
  47  	last := 0
  48  	for i := 0; i < len(s); {
  49  		r, width := utf8.DecodeRuneInString(s[i:])
  50  		i += width
  51  		switch r {
  52  		case '"':
  53  			esc = escQuot
  54  		case '\'':
  55  			esc = escApos
  56  		case '&':
  57  			esc = escAmp
  58  		case '<':
  59  			esc = escLT
  60  		case '>':
  61  			esc = escGT
  62  		case '\t':
  63  			esc = escTab
  64  		case '\n':
  65  			esc = escNL
  66  		case '\r':
  67  			esc = escCR
  68  		case '\u0085':
  69  			// Not escaped by stdlib
  70  			esc = escNextLine
  71  		case '\u2028':
  72  			// Not escaped by stdlib
  73  			esc = escLS
  74  		default:
  75  			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
  76  				esc = escFFFD
  77  				break
  78  			}
  79  			continue
  80  		}
  81  		e.WriteString(s[last : i-width])
  82  		e.Write(esc)
  83  		last = i
  84  	}
  85  	e.WriteString(s[last:])
  86  }
  87  
  88  // escapeText writes to w the properly escaped XML equivalent
  89  // of the plain text data s. If escapeNewline is true, newline
  90  // characters will be escaped.
  91  //
  92  // Based on encoding/xml escapeText from the Go Standard Library.
  93  // https://golang.org/src/encoding/xml/xml.go
  94  func escapeText(e writer, s []byte) {
  95  	var esc []byte
  96  	last := 0
  97  	for i := 0; i < len(s); {
  98  		r, width := utf8.DecodeRune(s[i:])
  99  		i += width
 100  		switch r {
 101  		case '"':
 102  			esc = escQuot
 103  		case '\'':
 104  			esc = escApos
 105  		case '&':
 106  			esc = escAmp
 107  		case '<':
 108  			esc = escLT
 109  		case '>':
 110  			esc = escGT
 111  		case '\t':
 112  			esc = escTab
 113  		case '\n':
 114  			// This always escapes newline, which is different than stdlib's optional
 115  			// escape of new line.
 116  			esc = escNL
 117  		case '\r':
 118  			esc = escCR
 119  		case '\u0085':
 120  			// Not escaped by stdlib
 121  			esc = escNextLine
 122  		case '\u2028':
 123  			// Not escaped by stdlib
 124  			esc = escLS
 125  		default:
 126  			if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
 127  				esc = escFFFD
 128  				break
 129  			}
 130  			continue
 131  		}
 132  		e.Write(s[last : i-width])
 133  		e.Write(esc)
 134  		last = i
 135  	}
 136  	e.Write(s[last:])
 137  }
 138