render.go raw

   1  // Copyright 2011 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package html
   6  
   7  import (
   8  	"bufio"
   9  	"errors"
  10  	"fmt"
  11  	"io"
  12  	"strings"
  13  )
  14  
  15  type writer interface {
  16  	io.Writer
  17  	io.ByteWriter
  18  	WriteString(string) (int, error)
  19  }
  20  
  21  // Render renders the parse tree n to the given writer.
  22  //
  23  // Rendering is done on a 'best effort' basis: calling Parse on the output of
  24  // Render will always result in something similar to the original tree, but it
  25  // is not necessarily an exact clone unless the original tree was 'well-formed'.
  26  // 'Well-formed' is not easily specified; the HTML5 specification is
  27  // complicated.
  28  //
  29  // Calling Parse on arbitrary input typically results in a 'well-formed' parse
  30  // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
  31  // For example, in a 'well-formed' parse tree, no <a> element is a child of
  32  // another <a> element: parsing "<a><a>" results in two sibling elements.
  33  // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
  34  // <table> element: parsing "<p><table><a>" results in a <p> with two sibling
  35  // children; the <a> is reparented to the <table>'s parent. However, calling
  36  // Parse on "<a><table><a>" does not return an error, but the result has an <a>
  37  // element with an <a> child, and is therefore not 'well-formed'.
  38  //
  39  // Programmatically constructed trees are typically also 'well-formed', but it
  40  // is possible to construct a tree that looks innocuous but, when rendered and
  41  // re-parsed, results in a different tree. A simple example is that a solitary
  42  // text node would become a tree containing <html>, <head> and <body> elements.
  43  // Another example is that the programmatic equivalent of "a<head>b</head>c"
  44  // becomes "<html><head><head/><body>abc</body></html>".
  45  func Render(w io.Writer, n *Node) error {
  46  	if x, ok := w.(writer); ok {
  47  		return render(x, n)
  48  	}
  49  	buf := bufio.NewWriter(w)
  50  	if err := render(buf, n); err != nil {
  51  		return err
  52  	}
  53  	return buf.Flush()
  54  }
  55  
  56  // plaintextAbort is returned from render1 when a <plaintext> element
  57  // has been rendered. No more end tags should be rendered after that.
  58  var plaintextAbort = errors.New("html: internal error (plaintext abort)")
  59  
  60  func render(w writer, n *Node) error {
  61  	err := render1(w, n)
  62  	if err == plaintextAbort {
  63  		err = nil
  64  	}
  65  	return err
  66  }
  67  
  68  func render1(w writer, n *Node) error {
  69  	// Render non-element nodes; these are the easy cases.
  70  	switch n.Type {
  71  	case ErrorNode:
  72  		return errors.New("html: cannot render an ErrorNode node")
  73  	case TextNode:
  74  		return escape(w, n.Data)
  75  	case DocumentNode:
  76  		for c := n.FirstChild; c != nil; c = c.NextSibling {
  77  			if err := render1(w, c); err != nil {
  78  				return err
  79  			}
  80  		}
  81  		return nil
  82  	case ElementNode:
  83  		// No-op.
  84  	case CommentNode:
  85  		if _, err := w.WriteString("<!--"); err != nil {
  86  			return err
  87  		}
  88  		if err := escapeComment(w, n.Data); err != nil {
  89  			return err
  90  		}
  91  		if _, err := w.WriteString("-->"); err != nil {
  92  			return err
  93  		}
  94  		return nil
  95  	case DoctypeNode:
  96  		if _, err := w.WriteString("<!DOCTYPE "); err != nil {
  97  			return err
  98  		}
  99  		if err := escape(w, n.Data); err != nil {
 100  			return err
 101  		}
 102  		if n.Attr != nil {
 103  			var p, s string
 104  			for _, a := range n.Attr {
 105  				switch a.Key {
 106  				case "public":
 107  					p = a.Val
 108  				case "system":
 109  					s = a.Val
 110  				}
 111  			}
 112  			if p != "" {
 113  				if _, err := w.WriteString(" PUBLIC "); err != nil {
 114  					return err
 115  				}
 116  				if err := writeQuoted(w, p); err != nil {
 117  					return err
 118  				}
 119  				if s != "" {
 120  					if err := w.WriteByte(' '); err != nil {
 121  						return err
 122  					}
 123  					if err := writeQuoted(w, s); err != nil {
 124  						return err
 125  					}
 126  				}
 127  			} else if s != "" {
 128  				if _, err := w.WriteString(" SYSTEM "); err != nil {
 129  					return err
 130  				}
 131  				if err := writeQuoted(w, s); err != nil {
 132  					return err
 133  				}
 134  			}
 135  		}
 136  		return w.WriteByte('>')
 137  	case RawNode:
 138  		_, err := w.WriteString(n.Data)
 139  		return err
 140  	default:
 141  		return errors.New("html: unknown node type")
 142  	}
 143  
 144  	// Render the <xxx> opening tag.
 145  	if err := w.WriteByte('<'); err != nil {
 146  		return err
 147  	}
 148  	if _, err := w.WriteString(n.Data); err != nil {
 149  		return err
 150  	}
 151  	for _, a := range n.Attr {
 152  		if err := w.WriteByte(' '); err != nil {
 153  			return err
 154  		}
 155  		if a.Namespace != "" {
 156  			if _, err := w.WriteString(a.Namespace); err != nil {
 157  				return err
 158  			}
 159  			if err := w.WriteByte(':'); err != nil {
 160  				return err
 161  			}
 162  		}
 163  		if _, err := w.WriteString(a.Key); err != nil {
 164  			return err
 165  		}
 166  		if _, err := w.WriteString(`="`); err != nil {
 167  			return err
 168  		}
 169  		if err := escape(w, a.Val); err != nil {
 170  			return err
 171  		}
 172  		if err := w.WriteByte('"'); err != nil {
 173  			return err
 174  		}
 175  	}
 176  	if voidElements[n.Data] {
 177  		if n.FirstChild != nil {
 178  			return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
 179  		}
 180  		_, err := w.WriteString("/>")
 181  		return err
 182  	}
 183  	if err := w.WriteByte('>'); err != nil {
 184  		return err
 185  	}
 186  
 187  	// Add initial newline where there is danger of a newline being ignored.
 188  	if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
 189  		switch n.Data {
 190  		case "pre", "listing", "textarea":
 191  			if err := w.WriteByte('\n'); err != nil {
 192  				return err
 193  			}
 194  		}
 195  	}
 196  
 197  	// Render any child nodes
 198  	if childTextNodesAreLiteral(n) {
 199  		for c := n.FirstChild; c != nil; c = c.NextSibling {
 200  			if c.Type == TextNode {
 201  				if _, err := w.WriteString(c.Data); err != nil {
 202  					return err
 203  				}
 204  			} else {
 205  				if err := render1(w, c); err != nil {
 206  					return err
 207  				}
 208  			}
 209  		}
 210  		if n.Data == "plaintext" {
 211  			// Don't render anything else. <plaintext> must be the
 212  			// last element in the file, with no closing tag.
 213  			return plaintextAbort
 214  		}
 215  	} else {
 216  		for c := n.FirstChild; c != nil; c = c.NextSibling {
 217  			if err := render1(w, c); err != nil {
 218  				return err
 219  			}
 220  		}
 221  	}
 222  
 223  	// Render the </xxx> closing tag.
 224  	if _, err := w.WriteString("</"); err != nil {
 225  		return err
 226  	}
 227  	if _, err := w.WriteString(n.Data); err != nil {
 228  		return err
 229  	}
 230  	return w.WriteByte('>')
 231  }
 232  
 233  func childTextNodesAreLiteral(n *Node) bool {
 234  	// Per WHATWG HTML 13.3, if the parent of the current node is a style,
 235  	// script, xmp, iframe, noembed, noframes, or plaintext element, and the
 236  	// current node is a text node, append the value of the node's data
 237  	// literally. The specification is not explicit about it, but we only
 238  	// enforce this if we are in the HTML namespace (i.e. when the namespace is
 239  	// "").
 240  	// NOTE: we also always include noscript elements, although the
 241  	// specification states that they should only be rendered as such if
 242  	// scripting is enabled for the node (which is not something we track).
 243  	if n.Namespace != "" {
 244  		return false
 245  	}
 246  	switch n.Data {
 247  	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
 248  		return true
 249  	default:
 250  		return false
 251  	}
 252  }
 253  
 254  // writeQuoted writes s to w surrounded by quotes. Normally it will use double
 255  // quotes, but if s contains a double quote, it will use single quotes.
 256  // It is used for writing the identifiers in a doctype declaration.
 257  // In valid HTML, they can't contain both types of quotes.
 258  func writeQuoted(w writer, s string) error {
 259  	var q byte = '"'
 260  	if strings.Contains(s, `"`) {
 261  		q = '\''
 262  	}
 263  	if err := w.WriteByte(q); err != nil {
 264  		return err
 265  	}
 266  	if _, err := w.WriteString(s); err != nil {
 267  		return err
 268  	}
 269  	if err := w.WriteByte(q); err != nil {
 270  		return err
 271  	}
 272  	return nil
 273  }
 274  
 275  // Section 12.1.2, "Elements", gives this list of void elements. Void elements
 276  // are those that can't have any contents.
 277  var voidElements = map[string]bool{
 278  	"area":   true,
 279  	"base":   true,
 280  	"br":     true,
 281  	"col":    true,
 282  	"embed":  true,
 283  	"hr":     true,
 284  	"img":    true,
 285  	"input":  true,
 286  	"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
 287  	"link":   true,
 288  	"meta":   true,
 289  	"param":  true,
 290  	"source": true,
 291  	"track":  true,
 292  	"wbr":    true,
 293  }
 294