parse.go raw

   1  // Copyright 2010 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  package html
   6  
   7  import (
   8  	"errors"
   9  	"fmt"
  10  	"io"
  11  	"strings"
  12  
  13  	a "golang.org/x/net/html/atom"
  14  )
  15  
  16  // A parser implements the HTML5 parsing algorithm:
  17  // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
  18  type parser struct {
  19  	// tokenizer provides the tokens for the parser.
  20  	tokenizer *Tokenizer
  21  	// tok is the most recently read token.
  22  	tok Token
  23  	// Self-closing tags like <hr/> are treated as start tags, except that
  24  	// hasSelfClosingToken is set while they are being processed.
  25  	hasSelfClosingToken bool
  26  	// doc is the document root element.
  27  	doc *Node
  28  	// The stack of open elements (section 12.2.4.2) and active formatting
  29  	// elements (section 12.2.4.3).
  30  	oe, afe nodeStack
  31  	// Element pointers (section 12.2.4.4).
  32  	head, form *Node
  33  	// Other parsing state flags (section 12.2.4.5).
  34  	scripting, framesetOK bool
  35  	// The stack of template insertion modes
  36  	templateStack insertionModeStack
  37  	// im is the current insertion mode.
  38  	im insertionMode
  39  	// originalIM is the insertion mode to go back to after completing a text
  40  	// or inTableText insertion mode.
  41  	originalIM insertionMode
  42  	// fosterParenting is whether new elements should be inserted according to
  43  	// the foster parenting rules (section 12.2.6.1).
  44  	fosterParenting bool
  45  	// quirks is whether the parser is operating in "quirks mode."
  46  	quirks bool
  47  	// fragment is whether the parser is parsing an HTML fragment.
  48  	fragment bool
  49  	// context is the context element when parsing an HTML fragment
  50  	// (section 12.4).
  51  	context *Node
  52  }
  53  
  54  func (p *parser) top() *Node {
  55  	if n := p.oe.top(); n != nil {
  56  		return n
  57  	}
  58  	return p.doc
  59  }
  60  
  61  // Stop tags for use in popUntil. These come from section 12.2.4.2.
  62  var (
  63  	defaultScopeStopTags = map[string][]a.Atom{
  64  		"":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
  65  		"math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
  66  		"svg":  {a.Desc, a.ForeignObject, a.Title},
  67  	}
  68  )
  69  
  70  type scope int
  71  
  72  const (
  73  	defaultScope scope = iota
  74  	listItemScope
  75  	buttonScope
  76  	tableScope
  77  	tableRowScope
  78  	tableBodyScope
  79  	selectScope
  80  )
  81  
  82  // popUntil pops the stack of open elements at the highest element whose tag
  83  // is in matchTags, provided there is no higher element in the scope's stop
  84  // tags (as defined in section 12.2.4.2). It returns whether or not there was
  85  // such an element. If there was not, popUntil leaves the stack unchanged.
  86  //
  87  // For example, the set of stop tags for table scope is: "html", "table". If
  88  // the stack was:
  89  // ["html", "body", "font", "table", "b", "i", "u"]
  90  // then popUntil(tableScope, "font") would return false, but
  91  // popUntil(tableScope, "i") would return true and the stack would become:
  92  // ["html", "body", "font", "table", "b"]
  93  //
  94  // If an element's tag is in both the stop tags and matchTags, then the stack
  95  // will be popped and the function returns true (provided, of course, there was
  96  // no higher element in the stack that was also in the stop tags). For example,
  97  // popUntil(tableScope, "table") returns true and leaves:
  98  // ["html", "body", "font"]
  99  func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
 100  	if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
 101  		p.oe = p.oe[:i]
 102  		return true
 103  	}
 104  	return false
 105  }
 106  
 107  // indexOfElementInScope returns the index in p.oe of the highest element whose
 108  // tag is in matchTags that is in scope. If no matching element is in scope, it
 109  // returns -1.
 110  func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
 111  	for i := len(p.oe) - 1; i >= 0; i-- {
 112  		tagAtom := p.oe[i].DataAtom
 113  		if p.oe[i].Namespace == "" {
 114  			for _, t := range matchTags {
 115  				if t == tagAtom {
 116  					return i
 117  				}
 118  			}
 119  			switch s {
 120  			case defaultScope:
 121  				// No-op.
 122  			case listItemScope:
 123  				if tagAtom == a.Ol || tagAtom == a.Ul {
 124  					return -1
 125  				}
 126  			case buttonScope:
 127  				if tagAtom == a.Button {
 128  					return -1
 129  				}
 130  			case tableScope:
 131  				if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 132  					return -1
 133  				}
 134  			case selectScope:
 135  				if tagAtom != a.Optgroup && tagAtom != a.Option {
 136  					return -1
 137  				}
 138  			default:
 139  				panic(fmt.Sprintf("html: internal error: indexOfElementInScope unknown scope: %d", s))
 140  			}
 141  		}
 142  		switch s {
 143  		case defaultScope, listItemScope, buttonScope:
 144  			for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
 145  				if t == tagAtom {
 146  					return -1
 147  				}
 148  			}
 149  		}
 150  	}
 151  	return -1
 152  }
 153  
 154  // elementInScope is like popUntil, except that it doesn't modify the stack of
 155  // open elements.
 156  func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
 157  	return p.indexOfElementInScope(s, matchTags...) != -1
 158  }
 159  
 160  // clearStackToContext pops elements off the stack of open elements until a
 161  // scope-defined element is found.
 162  func (p *parser) clearStackToContext(s scope) {
 163  	for i := len(p.oe) - 1; i >= 0; i-- {
 164  		tagAtom := p.oe[i].DataAtom
 165  		switch s {
 166  		case tableScope:
 167  			if tagAtom == a.Html || tagAtom == a.Table || tagAtom == a.Template {
 168  				p.oe = p.oe[:i+1]
 169  				return
 170  			}
 171  		case tableRowScope:
 172  			if tagAtom == a.Html || tagAtom == a.Tr || tagAtom == a.Template {
 173  				p.oe = p.oe[:i+1]
 174  				return
 175  			}
 176  		case tableBodyScope:
 177  			if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead || tagAtom == a.Template {
 178  				p.oe = p.oe[:i+1]
 179  				return
 180  			}
 181  		default:
 182  			panic(fmt.Sprintf("html: internal error: clearStackToContext unknown scope: %d", s))
 183  		}
 184  	}
 185  }
 186  
 187  // parseGenericRawTextElement implements the generic raw text element parsing
 188  // algorithm defined in 12.2.6.2.
 189  // https://html.spec.whatwg.org/multipage/parsing.html#parsing-elements-that-contain-only-text
 190  // TODO: Since both RAWTEXT and RCDATA states are treated as tokenizer's part
 191  // officially, need to make tokenizer consider both states.
 192  func (p *parser) parseGenericRawTextElement() {
 193  	p.addElement()
 194  	p.originalIM = p.im
 195  	p.im = textIM
 196  }
 197  
 198  // generateImpliedEndTags pops nodes off the stack of open elements as long as
 199  // the top node has a tag name of dd, dt, li, optgroup, option, p, rb, rp, rt or rtc.
 200  // If exceptions are specified, nodes with that name will not be popped off.
 201  func (p *parser) generateImpliedEndTags(exceptions ...string) {
 202  	var i int
 203  loop:
 204  	for i = len(p.oe) - 1; i >= 0; i-- {
 205  		n := p.oe[i]
 206  		if n.Type != ElementNode {
 207  			break
 208  		}
 209  		switch n.DataAtom {
 210  		case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc:
 211  			for _, except := range exceptions {
 212  				if n.Data == except {
 213  					break loop
 214  				}
 215  			}
 216  			continue
 217  		}
 218  		break
 219  	}
 220  
 221  	p.oe = p.oe[:i+1]
 222  }
 223  
 224  // addChild adds a child node n to the top element, and pushes n onto the stack
 225  // of open elements if it is an element node.
 226  func (p *parser) addChild(n *Node) {
 227  	if p.shouldFosterParent() {
 228  		p.fosterParent(n)
 229  	} else {
 230  		p.top().AppendChild(n)
 231  	}
 232  
 233  	if n.Type == ElementNode {
 234  		p.insertOpenElement(n)
 235  	}
 236  }
 237  
 238  func (p *parser) insertOpenElement(n *Node) {
 239  	p.oe = append(p.oe, n)
 240  	if len(p.oe) > 512 {
 241  		panic("html: open stack of elements exceeds 512 nodes")
 242  	}
 243  }
 244  
 245  // shouldFosterParent returns whether the next node to be added should be
 246  // foster parented.
 247  func (p *parser) shouldFosterParent() bool {
 248  	if p.fosterParenting {
 249  		switch p.top().DataAtom {
 250  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
 251  			return true
 252  		}
 253  	}
 254  	return false
 255  }
 256  
 257  // fosterParent adds a child node according to the foster parenting rules.
 258  // Section 12.2.6.1, "foster parenting".
 259  func (p *parser) fosterParent(n *Node) {
 260  	var table, parent, prev, template *Node
 261  	var i int
 262  	for i = len(p.oe) - 1; i >= 0; i-- {
 263  		if p.oe[i].DataAtom == a.Table {
 264  			table = p.oe[i]
 265  			break
 266  		}
 267  	}
 268  
 269  	var j int
 270  	for j = len(p.oe) - 1; j >= 0; j-- {
 271  		if p.oe[j].DataAtom == a.Template {
 272  			template = p.oe[j]
 273  			break
 274  		}
 275  	}
 276  
 277  	if template != nil && (table == nil || j > i) {
 278  		template.AppendChild(n)
 279  		return
 280  	}
 281  
 282  	if table == nil {
 283  		// The foster parent is the html element.
 284  		parent = p.oe[0]
 285  	} else {
 286  		parent = table.Parent
 287  	}
 288  	if parent == nil {
 289  		parent = p.oe[i-1]
 290  	}
 291  
 292  	if table != nil {
 293  		prev = table.PrevSibling
 294  	} else {
 295  		prev = parent.LastChild
 296  	}
 297  	if prev != nil && prev.Type == TextNode && n.Type == TextNode {
 298  		prev.Data += n.Data
 299  		return
 300  	}
 301  
 302  	parent.InsertBefore(n, table)
 303  }
 304  
 305  // addText adds text to the preceding node if it is a text node, or else it
 306  // calls addChild with a new text node.
 307  func (p *parser) addText(text string) {
 308  	if text == "" {
 309  		return
 310  	}
 311  
 312  	if p.shouldFosterParent() {
 313  		p.fosterParent(&Node{
 314  			Type: TextNode,
 315  			Data: text,
 316  		})
 317  		return
 318  	}
 319  
 320  	t := p.top()
 321  	if n := t.LastChild; n != nil && n.Type == TextNode {
 322  		n.Data += text
 323  		return
 324  	}
 325  	p.addChild(&Node{
 326  		Type: TextNode,
 327  		Data: text,
 328  	})
 329  }
 330  
 331  // addElement adds a child element based on the current token.
 332  func (p *parser) addElement() {
 333  	p.addChild(&Node{
 334  		Type:     ElementNode,
 335  		DataAtom: p.tok.DataAtom,
 336  		Data:     p.tok.Data,
 337  		Attr:     p.tok.Attr,
 338  	})
 339  }
 340  
 341  // Section 12.2.4.3.
 342  func (p *parser) addFormattingElement() {
 343  	tagAtom, attr := p.tok.DataAtom, p.tok.Attr
 344  	p.addElement()
 345  
 346  	// Implement the Noah's Ark clause, but with three per family instead of two.
 347  	identicalElements := 0
 348  findIdenticalElements:
 349  	for i := len(p.afe) - 1; i >= 0; i-- {
 350  		n := p.afe[i]
 351  		if n.Type == scopeMarkerNode {
 352  			break
 353  		}
 354  		if n.Type != ElementNode {
 355  			continue
 356  		}
 357  		if n.Namespace != "" {
 358  			continue
 359  		}
 360  		if n.DataAtom != tagAtom {
 361  			continue
 362  		}
 363  		if len(n.Attr) != len(attr) {
 364  			continue
 365  		}
 366  	compareAttributes:
 367  		for _, t0 := range n.Attr {
 368  			for _, t1 := range attr {
 369  				if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
 370  					// Found a match for this attribute, continue with the next attribute.
 371  					continue compareAttributes
 372  				}
 373  			}
 374  			// If we get here, there is no attribute that matches a.
 375  			// Therefore the element is not identical to the new one.
 376  			continue findIdenticalElements
 377  		}
 378  
 379  		identicalElements++
 380  		if identicalElements >= 3 {
 381  			p.afe.remove(n)
 382  		}
 383  	}
 384  
 385  	p.afe = append(p.afe, p.top())
 386  }
 387  
 388  // Section 12.2.4.3.
 389  func (p *parser) clearActiveFormattingElements() {
 390  	for {
 391  		if n := p.afe.pop(); len(p.afe) == 0 || n.Type == scopeMarkerNode {
 392  			return
 393  		}
 394  	}
 395  }
 396  
 397  // Section 12.2.4.3.
 398  func (p *parser) reconstructActiveFormattingElements() {
 399  	n := p.afe.top()
 400  	if n == nil {
 401  		return
 402  	}
 403  	if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
 404  		return
 405  	}
 406  	i := len(p.afe) - 1
 407  	for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
 408  		if i == 0 {
 409  			i = -1
 410  			break
 411  		}
 412  		i--
 413  		n = p.afe[i]
 414  	}
 415  	for {
 416  		i++
 417  		clone := p.afe[i].clone()
 418  		p.addChild(clone)
 419  		p.afe[i] = clone
 420  		if i == len(p.afe)-1 {
 421  			break
 422  		}
 423  	}
 424  }
 425  
 426  // Section 12.2.5.
 427  func (p *parser) acknowledgeSelfClosingTag() {
 428  	p.hasSelfClosingToken = false
 429  }
 430  
 431  // An insertion mode (section 12.2.4.1) is the state transition function from
 432  // a particular state in the HTML5 parser's state machine. It updates the
 433  // parser's fields depending on parser.tok (where ErrorToken means EOF).
 434  // It returns whether the token was consumed.
 435  type insertionMode func(*parser) bool
 436  
 437  // setOriginalIM sets the insertion mode to return to after completing a text or
 438  // inTableText insertion mode.
 439  // Section 12.2.4.1, "using the rules for".
 440  func (p *parser) setOriginalIM() {
 441  	if p.originalIM != nil {
 442  		panic("html: bad parser state: originalIM was set twice")
 443  	}
 444  	p.originalIM = p.im
 445  }
 446  
 447  // Section 12.2.4.1, "reset the insertion mode".
 448  func (p *parser) resetInsertionMode() {
 449  	for i := len(p.oe) - 1; i >= 0; i-- {
 450  		n := p.oe[i]
 451  		last := i == 0
 452  		if last && p.context != nil {
 453  			n = p.context
 454  		}
 455  
 456  		switch n.DataAtom {
 457  		case a.Select:
 458  			if !last {
 459  				for ancestor, first := n, p.oe[0]; ancestor != first; {
 460  					ancestor = p.oe[p.oe.index(ancestor)-1]
 461  					switch ancestor.DataAtom {
 462  					case a.Template:
 463  						p.im = inSelectIM
 464  						return
 465  					case a.Table:
 466  						p.im = inSelectInTableIM
 467  						return
 468  					}
 469  				}
 470  			}
 471  			p.im = inSelectIM
 472  		case a.Td, a.Th:
 473  			// TODO: remove this divergence from the HTML5 spec.
 474  			//
 475  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 476  			p.im = inCellIM
 477  		case a.Tr:
 478  			p.im = inRowIM
 479  		case a.Tbody, a.Thead, a.Tfoot:
 480  			p.im = inTableBodyIM
 481  		case a.Caption:
 482  			p.im = inCaptionIM
 483  		case a.Colgroup:
 484  			p.im = inColumnGroupIM
 485  		case a.Table:
 486  			p.im = inTableIM
 487  		case a.Template:
 488  			// TODO: remove this divergence from the HTML5 spec.
 489  			if n.Namespace != "" {
 490  				continue
 491  			}
 492  			p.im = p.templateStack.top()
 493  		case a.Head:
 494  			// TODO: remove this divergence from the HTML5 spec.
 495  			//
 496  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 497  			p.im = inHeadIM
 498  		case a.Body:
 499  			p.im = inBodyIM
 500  		case a.Frameset:
 501  			p.im = inFramesetIM
 502  		case a.Html:
 503  			if p.head == nil {
 504  				p.im = beforeHeadIM
 505  			} else {
 506  				p.im = afterHeadIM
 507  			}
 508  		default:
 509  			if last {
 510  				p.im = inBodyIM
 511  				return
 512  			}
 513  			continue
 514  		}
 515  		return
 516  	}
 517  }
 518  
 519  const whitespace = " \t\r\n\f"
 520  
 521  // Section 12.2.6.4.1.
 522  func initialIM(p *parser) bool {
 523  	switch p.tok.Type {
 524  	case TextToken:
 525  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 526  		if len(p.tok.Data) == 0 {
 527  			// It was all whitespace, so ignore it.
 528  			return true
 529  		}
 530  	case CommentToken:
 531  		p.doc.AppendChild(&Node{
 532  			Type: CommentNode,
 533  			Data: p.tok.Data,
 534  		})
 535  		return true
 536  	case DoctypeToken:
 537  		n, quirks := parseDoctype(p.tok.Data)
 538  		p.doc.AppendChild(n)
 539  		p.quirks = quirks
 540  		p.im = beforeHTMLIM
 541  		return true
 542  	}
 543  	p.quirks = true
 544  	p.im = beforeHTMLIM
 545  	return false
 546  }
 547  
 548  // Section 12.2.6.4.2.
 549  func beforeHTMLIM(p *parser) bool {
 550  	switch p.tok.Type {
 551  	case DoctypeToken:
 552  		// Ignore the token.
 553  		return true
 554  	case TextToken:
 555  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 556  		if len(p.tok.Data) == 0 {
 557  			// It was all whitespace, so ignore it.
 558  			return true
 559  		}
 560  	case StartTagToken:
 561  		if p.tok.DataAtom == a.Html {
 562  			p.addElement()
 563  			p.im = beforeHeadIM
 564  			return true
 565  		}
 566  	case EndTagToken:
 567  		switch p.tok.DataAtom {
 568  		case a.Head, a.Body, a.Html, a.Br:
 569  			p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 570  			return false
 571  		default:
 572  			// Ignore the token.
 573  			return true
 574  		}
 575  	case CommentToken:
 576  		p.doc.AppendChild(&Node{
 577  			Type: CommentNode,
 578  			Data: p.tok.Data,
 579  		})
 580  		return true
 581  	}
 582  	p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 583  	return false
 584  }
 585  
 586  // Section 12.2.6.4.3.
 587  func beforeHeadIM(p *parser) bool {
 588  	switch p.tok.Type {
 589  	case TextToken:
 590  		p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 591  		if len(p.tok.Data) == 0 {
 592  			// It was all whitespace, so ignore it.
 593  			return true
 594  		}
 595  	case StartTagToken:
 596  		switch p.tok.DataAtom {
 597  		case a.Head:
 598  			p.addElement()
 599  			p.head = p.top()
 600  			p.im = inHeadIM
 601  			return true
 602  		case a.Html:
 603  			return inBodyIM(p)
 604  		}
 605  	case EndTagToken:
 606  		switch p.tok.DataAtom {
 607  		case a.Head, a.Body, a.Html, a.Br:
 608  			p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 609  			return false
 610  		default:
 611  			// Ignore the token.
 612  			return true
 613  		}
 614  	case CommentToken:
 615  		p.addChild(&Node{
 616  			Type: CommentNode,
 617  			Data: p.tok.Data,
 618  		})
 619  		return true
 620  	case DoctypeToken:
 621  		// Ignore the token.
 622  		return true
 623  	}
 624  
 625  	p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 626  	return false
 627  }
 628  
 629  // Section 12.2.6.4.4.
 630  func inHeadIM(p *parser) bool {
 631  	switch p.tok.Type {
 632  	case TextToken:
 633  		s := strings.TrimLeft(p.tok.Data, whitespace)
 634  		if len(s) < len(p.tok.Data) {
 635  			// Add the initial whitespace to the current node.
 636  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 637  			if s == "" {
 638  				return true
 639  			}
 640  			p.tok.Data = s
 641  		}
 642  	case StartTagToken:
 643  		switch p.tok.DataAtom {
 644  		case a.Html:
 645  			return inBodyIM(p)
 646  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta:
 647  			p.addElement()
 648  			p.oe.pop()
 649  			p.acknowledgeSelfClosingTag()
 650  			return true
 651  		case a.Noscript:
 652  			if p.scripting {
 653  				p.parseGenericRawTextElement()
 654  				return true
 655  			}
 656  			p.addElement()
 657  			p.im = inHeadNoscriptIM
 658  			// Don't let the tokenizer go into raw text mode when scripting is disabled.
 659  			p.tokenizer.NextIsNotRawText()
 660  			return true
 661  		case a.Script, a.Title:
 662  			p.addElement()
 663  			p.setOriginalIM()
 664  			p.im = textIM
 665  			return true
 666  		case a.Noframes, a.Style:
 667  			p.parseGenericRawTextElement()
 668  			return true
 669  		case a.Head:
 670  			// Ignore the token.
 671  			return true
 672  		case a.Template:
 673  			// TODO: remove this divergence from the HTML5 spec.
 674  			//
 675  			// We don't handle all of the corner cases when mixing foreign
 676  			// content (i.e. <math> or <svg>) with <template>. Without this
 677  			// early return, we can get into an infinite loop, possibly because
 678  			// of the "TODO... further divergence" a little below.
 679  			//
 680  			// As a workaround, if we are mixing foreign content and templates,
 681  			// just ignore the rest of the HTML. Foreign content is rare and a
 682  			// relatively old HTML feature. Templates are also rare and a
 683  			// relatively new HTML feature. Their combination is very rare.
 684  			for _, e := range p.oe {
 685  				if e.Namespace != "" {
 686  					p.im = ignoreTheRemainingTokens
 687  					return true
 688  				}
 689  			}
 690  
 691  			p.addElement()
 692  			p.afe = append(p.afe, &scopeMarker)
 693  			p.framesetOK = false
 694  			p.im = inTemplateIM
 695  			p.templateStack = append(p.templateStack, inTemplateIM)
 696  			return true
 697  		}
 698  	case EndTagToken:
 699  		switch p.tok.DataAtom {
 700  		case a.Head:
 701  			p.oe.pop()
 702  			p.im = afterHeadIM
 703  			return true
 704  		case a.Body, a.Html, a.Br:
 705  			p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 706  			return false
 707  		case a.Template:
 708  			if !p.oe.contains(a.Template) {
 709  				return true
 710  			}
 711  			// TODO: remove this further divergence from the HTML5 spec.
 712  			//
 713  			// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
 714  			p.generateImpliedEndTags()
 715  			for i := len(p.oe) - 1; i >= 0; i-- {
 716  				if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
 717  					p.oe = p.oe[:i]
 718  					break
 719  				}
 720  			}
 721  			p.clearActiveFormattingElements()
 722  			p.templateStack.pop()
 723  			p.resetInsertionMode()
 724  			return true
 725  		default:
 726  			// Ignore the token.
 727  			return true
 728  		}
 729  	case CommentToken:
 730  		p.addChild(&Node{
 731  			Type: CommentNode,
 732  			Data: p.tok.Data,
 733  		})
 734  		return true
 735  	case DoctypeToken:
 736  		// Ignore the token.
 737  		return true
 738  	}
 739  
 740  	p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 741  	return false
 742  }
 743  
 744  // Section 12.2.6.4.5.
 745  func inHeadNoscriptIM(p *parser) bool {
 746  	switch p.tok.Type {
 747  	case DoctypeToken:
 748  		// Ignore the token.
 749  		return true
 750  	case StartTagToken:
 751  		switch p.tok.DataAtom {
 752  		case a.Html:
 753  			return inBodyIM(p)
 754  		case a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Style:
 755  			return inHeadIM(p)
 756  		case a.Head:
 757  			// Ignore the token.
 758  			return true
 759  		case a.Noscript:
 760  			// Don't let the tokenizer go into raw text mode even when a <noscript>
 761  			// tag is in "in head noscript" insertion mode.
 762  			p.tokenizer.NextIsNotRawText()
 763  			// Ignore the token.
 764  			return true
 765  		}
 766  	case EndTagToken:
 767  		switch p.tok.DataAtom {
 768  		case a.Noscript, a.Br:
 769  		default:
 770  			// Ignore the token.
 771  			return true
 772  		}
 773  	case TextToken:
 774  		s := strings.TrimLeft(p.tok.Data, whitespace)
 775  		if len(s) == 0 {
 776  			// It was all whitespace.
 777  			return inHeadIM(p)
 778  		}
 779  	case CommentToken:
 780  		return inHeadIM(p)
 781  	}
 782  	p.oe.pop()
 783  	if p.top().DataAtom != a.Head {
 784  		panic("html: the new current node will be a head element.")
 785  	}
 786  	p.im = inHeadIM
 787  	if p.tok.DataAtom == a.Noscript {
 788  		return true
 789  	}
 790  	return false
 791  }
 792  
 793  // Section 12.2.6.4.6.
 794  func afterHeadIM(p *parser) bool {
 795  	switch p.tok.Type {
 796  	case TextToken:
 797  		s := strings.TrimLeft(p.tok.Data, whitespace)
 798  		if len(s) < len(p.tok.Data) {
 799  			// Add the initial whitespace to the current node.
 800  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 801  			if s == "" {
 802  				return true
 803  			}
 804  			p.tok.Data = s
 805  		}
 806  	case StartTagToken:
 807  		switch p.tok.DataAtom {
 808  		case a.Html:
 809  			return inBodyIM(p)
 810  		case a.Body:
 811  			p.addElement()
 812  			p.framesetOK = false
 813  			p.im = inBodyIM
 814  			return true
 815  		case a.Frameset:
 816  			p.addElement()
 817  			p.im = inFramesetIM
 818  			return true
 819  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 820  			p.insertOpenElement(p.head)
 821  			defer p.oe.remove(p.head)
 822  			return inHeadIM(p)
 823  		case a.Head:
 824  			// Ignore the token.
 825  			return true
 826  		}
 827  	case EndTagToken:
 828  		switch p.tok.DataAtom {
 829  		case a.Body, a.Html, a.Br:
 830  			// Drop down to creating an implied <body> tag.
 831  		case a.Template:
 832  			return inHeadIM(p)
 833  		default:
 834  			// Ignore the token.
 835  			return true
 836  		}
 837  	case CommentToken:
 838  		p.addChild(&Node{
 839  			Type: CommentNode,
 840  			Data: p.tok.Data,
 841  		})
 842  		return true
 843  	case DoctypeToken:
 844  		// Ignore the token.
 845  		return true
 846  	}
 847  
 848  	p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
 849  	p.framesetOK = true
 850  	if p.tok.Type == ErrorToken {
 851  		// Stop parsing.
 852  		return true
 853  	}
 854  	return false
 855  }
 856  
 857  // copyAttributes copies attributes of src not found on dst to dst.
 858  func copyAttributes(dst *Node, src Token) {
 859  	if len(src.Attr) == 0 {
 860  		return
 861  	}
 862  	attr := map[string]string{}
 863  	for _, t := range dst.Attr {
 864  		attr[t.Key] = t.Val
 865  	}
 866  	for _, t := range src.Attr {
 867  		if _, ok := attr[t.Key]; !ok {
 868  			dst.Attr = append(dst.Attr, t)
 869  			attr[t.Key] = t.Val
 870  		}
 871  	}
 872  }
 873  
 874  // Section 12.2.6.4.7.
 875  func inBodyIM(p *parser) bool {
 876  	switch p.tok.Type {
 877  	case TextToken:
 878  		d := p.tok.Data
 879  		switch n := p.oe.top(); n.DataAtom {
 880  		case a.Pre, a.Listing:
 881  			if n.FirstChild == nil {
 882  				// Ignore a newline at the start of a <pre> block.
 883  				if d != "" && d[0] == '\r' {
 884  					d = d[1:]
 885  				}
 886  				if d != "" && d[0] == '\n' {
 887  					d = d[1:]
 888  				}
 889  			}
 890  		}
 891  		d = strings.Replace(d, "\x00", "", -1)
 892  		if d == "" {
 893  			return true
 894  		}
 895  		p.reconstructActiveFormattingElements()
 896  		p.addText(d)
 897  		if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
 898  			// There were non-whitespace characters inserted.
 899  			p.framesetOK = false
 900  		}
 901  	case StartTagToken:
 902  		switch p.tok.DataAtom {
 903  		case a.Html:
 904  			if p.oe.contains(a.Template) {
 905  				return true
 906  			}
 907  			copyAttributes(p.oe[0], p.tok)
 908  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
 909  			return inHeadIM(p)
 910  		case a.Body:
 911  			if p.oe.contains(a.Template) {
 912  				return true
 913  			}
 914  			if len(p.oe) >= 2 {
 915  				body := p.oe[1]
 916  				if body.Type == ElementNode && body.DataAtom == a.Body {
 917  					p.framesetOK = false
 918  					copyAttributes(body, p.tok)
 919  				}
 920  			}
 921  		case a.Frameset:
 922  			if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
 923  				// Ignore the token.
 924  				return true
 925  			}
 926  			body := p.oe[1]
 927  			if body.Parent != nil {
 928  				body.Parent.RemoveChild(body)
 929  			}
 930  			p.oe = p.oe[:1]
 931  			p.addElement()
 932  			p.im = inFramesetIM
 933  			return true
 934  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Main, a.Menu, a.Nav, a.Ol, a.P, a.Search, a.Section, a.Summary, a.Ul:
 935  			p.popUntil(buttonScope, a.P)
 936  			p.addElement()
 937  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 938  			p.popUntil(buttonScope, a.P)
 939  			switch n := p.top(); n.DataAtom {
 940  			case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 941  				p.oe.pop()
 942  			}
 943  			p.addElement()
 944  		case a.Pre, a.Listing:
 945  			p.popUntil(buttonScope, a.P)
 946  			p.addElement()
 947  			// The newline, if any, will be dealt with by the TextToken case.
 948  			p.framesetOK = false
 949  		case a.Form:
 950  			if p.form != nil && !p.oe.contains(a.Template) {
 951  				// Ignore the token
 952  				return true
 953  			}
 954  			p.popUntil(buttonScope, a.P)
 955  			p.addElement()
 956  			if !p.oe.contains(a.Template) {
 957  				p.form = p.top()
 958  			}
 959  		case a.Li:
 960  			p.framesetOK = false
 961  			for i := len(p.oe) - 1; i >= 0; i-- {
 962  				node := p.oe[i]
 963  				switch node.DataAtom {
 964  				case a.Li:
 965  					p.oe = p.oe[:i]
 966  				case a.Address, a.Div, a.P:
 967  					continue
 968  				default:
 969  					if !isSpecialElement(node) {
 970  						continue
 971  					}
 972  				}
 973  				break
 974  			}
 975  			p.popUntil(buttonScope, a.P)
 976  			p.addElement()
 977  		case a.Dd, a.Dt:
 978  			p.framesetOK = false
 979  			for i := len(p.oe) - 1; i >= 0; i-- {
 980  				node := p.oe[i]
 981  				switch node.DataAtom {
 982  				case a.Dd, a.Dt:
 983  					p.oe = p.oe[:i]
 984  				case a.Address, a.Div, a.P:
 985  					continue
 986  				default:
 987  					if !isSpecialElement(node) {
 988  						continue
 989  					}
 990  				}
 991  				break
 992  			}
 993  			p.popUntil(buttonScope, a.P)
 994  			p.addElement()
 995  		case a.Plaintext:
 996  			p.popUntil(buttonScope, a.P)
 997  			p.addElement()
 998  		case a.Button:
 999  			p.popUntil(defaultScope, a.Button)
1000  			p.reconstructActiveFormattingElements()
1001  			p.addElement()
1002  			p.framesetOK = false
1003  		case a.A:
1004  			for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
1005  				if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
1006  					p.inBodyEndTagFormatting(a.A, "a")
1007  					p.oe.remove(n)
1008  					p.afe.remove(n)
1009  					break
1010  				}
1011  			}
1012  			p.reconstructActiveFormattingElements()
1013  			p.addFormattingElement()
1014  		case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1015  			p.reconstructActiveFormattingElements()
1016  			p.addFormattingElement()
1017  		case a.Nobr:
1018  			p.reconstructActiveFormattingElements()
1019  			if p.elementInScope(defaultScope, a.Nobr) {
1020  				p.inBodyEndTagFormatting(a.Nobr, "nobr")
1021  				p.reconstructActiveFormattingElements()
1022  			}
1023  			p.addFormattingElement()
1024  		case a.Applet, a.Marquee, a.Object:
1025  			p.reconstructActiveFormattingElements()
1026  			p.addElement()
1027  			p.afe = append(p.afe, &scopeMarker)
1028  			p.framesetOK = false
1029  		case a.Table:
1030  			if !p.quirks {
1031  				p.popUntil(buttonScope, a.P)
1032  			}
1033  			p.addElement()
1034  			p.framesetOK = false
1035  			p.im = inTableIM
1036  			return true
1037  		case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
1038  			p.reconstructActiveFormattingElements()
1039  			p.addElement()
1040  			p.oe.pop()
1041  			p.acknowledgeSelfClosingTag()
1042  			if p.tok.DataAtom == a.Input {
1043  				for _, t := range p.tok.Attr {
1044  					if t.Key == "type" {
1045  						if strings.EqualFold(t.Val, "hidden") {
1046  							// Skip setting framesetOK = false
1047  							return true
1048  						}
1049  					}
1050  				}
1051  			}
1052  			p.framesetOK = false
1053  		case a.Param, a.Source, a.Track:
1054  			p.addElement()
1055  			p.oe.pop()
1056  			p.acknowledgeSelfClosingTag()
1057  		case a.Hr:
1058  			p.popUntil(buttonScope, a.P)
1059  			p.addElement()
1060  			p.oe.pop()
1061  			p.acknowledgeSelfClosingTag()
1062  			p.framesetOK = false
1063  		case a.Image:
1064  			p.tok.DataAtom = a.Img
1065  			p.tok.Data = a.Img.String()
1066  			return false
1067  		case a.Textarea:
1068  			p.addElement()
1069  			p.setOriginalIM()
1070  			p.framesetOK = false
1071  			p.im = textIM
1072  		case a.Xmp:
1073  			p.popUntil(buttonScope, a.P)
1074  			p.reconstructActiveFormattingElements()
1075  			p.framesetOK = false
1076  			p.parseGenericRawTextElement()
1077  		case a.Iframe:
1078  			p.framesetOK = false
1079  			p.parseGenericRawTextElement()
1080  		case a.Noembed:
1081  			p.parseGenericRawTextElement()
1082  		case a.Noscript:
1083  			if p.scripting {
1084  				p.parseGenericRawTextElement()
1085  				return true
1086  			}
1087  			p.reconstructActiveFormattingElements()
1088  			p.addElement()
1089  			// Don't let the tokenizer go into raw text mode when scripting is disabled.
1090  			p.tokenizer.NextIsNotRawText()
1091  		case a.Select:
1092  			p.reconstructActiveFormattingElements()
1093  			p.addElement()
1094  			p.framesetOK = false
1095  			p.im = inSelectIM
1096  			return true
1097  		case a.Optgroup, a.Option:
1098  			if p.top().DataAtom == a.Option {
1099  				p.oe.pop()
1100  			}
1101  			p.reconstructActiveFormattingElements()
1102  			p.addElement()
1103  		case a.Rb, a.Rtc:
1104  			if p.elementInScope(defaultScope, a.Ruby) {
1105  				p.generateImpliedEndTags()
1106  			}
1107  			p.addElement()
1108  		case a.Rp, a.Rt:
1109  			if p.elementInScope(defaultScope, a.Ruby) {
1110  				p.generateImpliedEndTags("rtc")
1111  			}
1112  			p.addElement()
1113  		case a.Math, a.Svg:
1114  			p.reconstructActiveFormattingElements()
1115  			if p.tok.DataAtom == a.Math {
1116  				adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1117  			} else {
1118  				adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1119  			}
1120  			adjustForeignAttributes(p.tok.Attr)
1121  			p.addElement()
1122  			p.top().Namespace = p.tok.Data
1123  			if p.hasSelfClosingToken {
1124  				p.oe.pop()
1125  				p.acknowledgeSelfClosingTag()
1126  			}
1127  			return true
1128  		case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1129  			// Ignore the token.
1130  		default:
1131  			p.reconstructActiveFormattingElements()
1132  			p.addElement()
1133  		}
1134  	case EndTagToken:
1135  		switch p.tok.DataAtom {
1136  		case a.Body:
1137  			if p.elementInScope(defaultScope, a.Body) {
1138  				p.im = afterBodyIM
1139  			}
1140  		case a.Html:
1141  			if p.elementInScope(defaultScope, a.Body) {
1142  				p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
1143  				return false
1144  			}
1145  			return true
1146  		case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dialog, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Main, a.Menu, a.Nav, a.Ol, a.Pre, a.Search, a.Section, a.Summary, a.Ul:
1147  			p.popUntil(defaultScope, p.tok.DataAtom)
1148  		case a.Form:
1149  			if p.oe.contains(a.Template) {
1150  				i := p.indexOfElementInScope(defaultScope, a.Form)
1151  				if i == -1 {
1152  					// Ignore the token.
1153  					return true
1154  				}
1155  				p.generateImpliedEndTags()
1156  				if p.oe[i].DataAtom != a.Form {
1157  					// Ignore the token.
1158  					return true
1159  				}
1160  				p.popUntil(defaultScope, a.Form)
1161  			} else {
1162  				node := p.form
1163  				p.form = nil
1164  				i := p.indexOfElementInScope(defaultScope, a.Form)
1165  				if node == nil || i == -1 || p.oe[i] != node {
1166  					// Ignore the token.
1167  					return true
1168  				}
1169  				p.generateImpliedEndTags()
1170  				p.oe.remove(node)
1171  			}
1172  		case a.P:
1173  			if !p.elementInScope(buttonScope, a.P) {
1174  				p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1175  			}
1176  			p.popUntil(buttonScope, a.P)
1177  		case a.Li:
1178  			p.popUntil(listItemScope, a.Li)
1179  		case a.Dd, a.Dt:
1180  			p.popUntil(defaultScope, p.tok.DataAtom)
1181  		case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1182  			p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1183  		case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1184  			p.inBodyEndTagFormatting(p.tok.DataAtom, p.tok.Data)
1185  		case a.Applet, a.Marquee, a.Object:
1186  			if p.popUntil(defaultScope, p.tok.DataAtom) {
1187  				p.clearActiveFormattingElements()
1188  			}
1189  		case a.Br:
1190  			p.tok.Type = StartTagToken
1191  			return false
1192  		case a.Template:
1193  			return inHeadIM(p)
1194  		default:
1195  			p.inBodyEndTagOther(p.tok.DataAtom, p.tok.Data)
1196  		}
1197  	case CommentToken:
1198  		p.addChild(&Node{
1199  			Type: CommentNode,
1200  			Data: p.tok.Data,
1201  		})
1202  	case ErrorToken:
1203  		// TODO: remove this divergence from the HTML5 spec.
1204  		if len(p.templateStack) > 0 {
1205  			p.im = inTemplateIM
1206  			return false
1207  		}
1208  		for _, e := range p.oe {
1209  			switch e.DataAtom {
1210  			case a.Dd, a.Dt, a.Li, a.Optgroup, a.Option, a.P, a.Rb, a.Rp, a.Rt, a.Rtc, a.Tbody, a.Td, a.Tfoot, a.Th,
1211  				a.Thead, a.Tr, a.Body, a.Html:
1212  			default:
1213  				return true
1214  			}
1215  		}
1216  	}
1217  
1218  	return true
1219  }
1220  
1221  func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom, tagName string) {
1222  	// This is the "adoption agency" algorithm, described at
1223  	// https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1224  
1225  	// TODO: this is a fairly literal line-by-line translation of that algorithm.
1226  	// Once the code successfully parses the comprehensive test suite, we should
1227  	// refactor this code to be more idiomatic.
1228  
1229  	// Steps 1-2
1230  	if current := p.oe.top(); current.Data == tagName && p.afe.index(current) == -1 {
1231  		p.oe.pop()
1232  		return
1233  	}
1234  
1235  	// Steps 3-5. The outer loop.
1236  	for i := 0; i < 8; i++ {
1237  		// Step 6. Find the formatting element.
1238  		var formattingElement *Node
1239  		for j := len(p.afe) - 1; j >= 0; j-- {
1240  			if p.afe[j].Type == scopeMarkerNode {
1241  				break
1242  			}
1243  			if p.afe[j].DataAtom == tagAtom {
1244  				formattingElement = p.afe[j]
1245  				break
1246  			}
1247  		}
1248  		if formattingElement == nil {
1249  			p.inBodyEndTagOther(tagAtom, tagName)
1250  			return
1251  		}
1252  
1253  		// Step 7. Ignore the tag if formatting element is not in the stack of open elements.
1254  		feIndex := p.oe.index(formattingElement)
1255  		if feIndex == -1 {
1256  			p.afe.remove(formattingElement)
1257  			return
1258  		}
1259  		// Step 8. Ignore the tag if formatting element is not in the scope.
1260  		if !p.elementInScope(defaultScope, tagAtom) {
1261  			// Ignore the tag.
1262  			return
1263  		}
1264  
1265  		// Step 9. This step is omitted because it's just a parse error but no need to return.
1266  
1267  		// Steps 10-11. Find the furthest block.
1268  		var furthestBlock *Node
1269  		for _, e := range p.oe[feIndex:] {
1270  			if isSpecialElement(e) {
1271  				furthestBlock = e
1272  				break
1273  			}
1274  		}
1275  		if furthestBlock == nil {
1276  			e := p.oe.pop()
1277  			for e != formattingElement {
1278  				e = p.oe.pop()
1279  			}
1280  			p.afe.remove(e)
1281  			return
1282  		}
1283  
1284  		// Steps 12-13. Find the common ancestor and bookmark node.
1285  		commonAncestor := p.oe[feIndex-1]
1286  		bookmark := p.afe.index(formattingElement)
1287  
1288  		// Step 14. The inner loop. Find the lastNode to reparent.
1289  		lastNode := furthestBlock
1290  		node := furthestBlock
1291  		x := p.oe.index(node)
1292  		// Step 14.1.
1293  		j := 0
1294  		for {
1295  			// Step 14.2.
1296  			j++
1297  			// Step. 14.3.
1298  			x--
1299  			node = p.oe[x]
1300  			// Step 14.4. Go to the next step if node is formatting element.
1301  			if node == formattingElement {
1302  				break
1303  			}
1304  			// Step 14.5. Remove node from the list of active formatting elements if
1305  			// inner loop counter is greater than three and node is in the list of
1306  			// active formatting elements.
1307  			if ni := p.afe.index(node); j > 3 && ni > -1 {
1308  				p.afe.remove(node)
1309  				// If any element of the list of active formatting elements is removed,
1310  				// we need to take care whether bookmark should be decremented or not.
1311  				// This is because the value of bookmark may exceed the size of the
1312  				// list by removing elements from the list.
1313  				if ni <= bookmark {
1314  					bookmark--
1315  				}
1316  				continue
1317  			}
1318  			// Step 14.6. Continue the next inner loop if node is not in the list of
1319  			// active formatting elements.
1320  			if p.afe.index(node) == -1 {
1321  				p.oe.remove(node)
1322  				continue
1323  			}
1324  			// Step 14.7.
1325  			clone := node.clone()
1326  			p.afe[p.afe.index(node)] = clone
1327  			p.oe[p.oe.index(node)] = clone
1328  			node = clone
1329  			// Step 14.8.
1330  			if lastNode == furthestBlock {
1331  				bookmark = p.afe.index(node) + 1
1332  			}
1333  			// Step 14.9.
1334  			if lastNode.Parent != nil {
1335  				lastNode.Parent.RemoveChild(lastNode)
1336  			}
1337  			node.AppendChild(lastNode)
1338  			// Step 14.10.
1339  			lastNode = node
1340  		}
1341  
1342  		// Step 15. Reparent lastNode to the common ancestor,
1343  		// or for misnested table nodes, to the foster parent.
1344  		if lastNode.Parent != nil {
1345  			lastNode.Parent.RemoveChild(lastNode)
1346  		}
1347  		switch commonAncestor.DataAtom {
1348  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1349  			p.fosterParent(lastNode)
1350  		default:
1351  			commonAncestor.AppendChild(lastNode)
1352  		}
1353  
1354  		// Steps 16-18. Reparent nodes from the furthest block's children
1355  		// to a clone of the formatting element.
1356  		clone := formattingElement.clone()
1357  		reparentChildren(clone, furthestBlock)
1358  		furthestBlock.AppendChild(clone)
1359  
1360  		// Step 19. Fix up the list of active formatting elements.
1361  		if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1362  			// Move the bookmark with the rest of the list.
1363  			bookmark--
1364  		}
1365  		p.afe.remove(formattingElement)
1366  		p.afe.insert(bookmark, clone)
1367  
1368  		// Step 20. Fix up the stack of open elements.
1369  		p.oe.remove(formattingElement)
1370  		p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1371  	}
1372  }
1373  
1374  // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1375  // "Any other end tag" handling from 12.2.6.5 The rules for parsing tokens in foreign content
1376  // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1377  func (p *parser) inBodyEndTagOther(tagAtom a.Atom, tagName string) {
1378  	for i := len(p.oe) - 1; i >= 0; i-- {
1379  		// Two element nodes have the same tag if they have the same Data (a
1380  		// string-typed field). As an optimization, for common HTML tags, each
1381  		// Data string is assigned a unique, non-zero DataAtom (a uint32-typed
1382  		// field), since integer comparison is faster than string comparison.
1383  		// Uncommon (custom) tags get a zero DataAtom.
1384  		//
1385  		// The if condition here is equivalent to (p.oe[i].Data == tagName).
1386  		if (p.oe[i].DataAtom == tagAtom) &&
1387  			((tagAtom != 0) || (p.oe[i].Data == tagName)) {
1388  			p.oe = p.oe[:i]
1389  			break
1390  		}
1391  		if isSpecialElement(p.oe[i]) {
1392  			break
1393  		}
1394  	}
1395  }
1396  
1397  // Section 12.2.6.4.8.
1398  func textIM(p *parser) bool {
1399  	switch p.tok.Type {
1400  	case ErrorToken:
1401  		p.oe.pop()
1402  	case TextToken:
1403  		d := p.tok.Data
1404  		if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1405  			// Ignore a newline at the start of a <textarea> block.
1406  			if d != "" && d[0] == '\r' {
1407  				d = d[1:]
1408  			}
1409  			if d != "" && d[0] == '\n' {
1410  				d = d[1:]
1411  			}
1412  		}
1413  		if d == "" {
1414  			return true
1415  		}
1416  		p.addText(d)
1417  		return true
1418  	case EndTagToken:
1419  		p.oe.pop()
1420  	}
1421  	p.im = p.originalIM
1422  	p.originalIM = nil
1423  	return p.tok.Type == EndTagToken
1424  }
1425  
1426  // Section 12.2.6.4.9.
1427  func inTableIM(p *parser) bool {
1428  	switch p.tok.Type {
1429  	case TextToken:
1430  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1431  		switch p.oe.top().DataAtom {
1432  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1433  			if strings.Trim(p.tok.Data, whitespace) == "" {
1434  				p.addText(p.tok.Data)
1435  				return true
1436  			}
1437  		}
1438  	case StartTagToken:
1439  		switch p.tok.DataAtom {
1440  		case a.Caption:
1441  			p.clearStackToContext(tableScope)
1442  			p.afe = append(p.afe, &scopeMarker)
1443  			p.addElement()
1444  			p.im = inCaptionIM
1445  			return true
1446  		case a.Colgroup:
1447  			p.clearStackToContext(tableScope)
1448  			p.addElement()
1449  			p.im = inColumnGroupIM
1450  			return true
1451  		case a.Col:
1452  			p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1453  			return false
1454  		case a.Tbody, a.Tfoot, a.Thead:
1455  			p.clearStackToContext(tableScope)
1456  			p.addElement()
1457  			p.im = inTableBodyIM
1458  			return true
1459  		case a.Td, a.Th, a.Tr:
1460  			p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1461  			return false
1462  		case a.Table:
1463  			if p.popUntil(tableScope, a.Table) {
1464  				p.resetInsertionMode()
1465  				return false
1466  			}
1467  			// Ignore the token.
1468  			return true
1469  		case a.Style, a.Script, a.Template:
1470  			return inHeadIM(p)
1471  		case a.Input:
1472  			for _, t := range p.tok.Attr {
1473  				if t.Key == "type" && strings.EqualFold(t.Val, "hidden") {
1474  					p.addElement()
1475  					p.oe.pop()
1476  					return true
1477  				}
1478  			}
1479  			// Otherwise drop down to the default action.
1480  		case a.Form:
1481  			if p.oe.contains(a.Template) || p.form != nil {
1482  				// Ignore the token.
1483  				return true
1484  			}
1485  			p.addElement()
1486  			p.form = p.oe.pop()
1487  		case a.Select:
1488  			p.reconstructActiveFormattingElements()
1489  			switch p.top().DataAtom {
1490  			case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1491  				p.fosterParenting = true
1492  			}
1493  			p.addElement()
1494  			p.fosterParenting = false
1495  			p.framesetOK = false
1496  			p.im = inSelectInTableIM
1497  			return true
1498  		}
1499  	case EndTagToken:
1500  		switch p.tok.DataAtom {
1501  		case a.Table:
1502  			if p.popUntil(tableScope, a.Table) {
1503  				p.resetInsertionMode()
1504  				return true
1505  			}
1506  			// Ignore the token.
1507  			return true
1508  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1509  			// Ignore the token.
1510  			return true
1511  		case a.Template:
1512  			return inHeadIM(p)
1513  		}
1514  	case CommentToken:
1515  		p.addChild(&Node{
1516  			Type: CommentNode,
1517  			Data: p.tok.Data,
1518  		})
1519  		return true
1520  	case DoctypeToken:
1521  		// Ignore the token.
1522  		return true
1523  	case ErrorToken:
1524  		return inBodyIM(p)
1525  	}
1526  
1527  	p.fosterParenting = true
1528  	defer func() { p.fosterParenting = false }()
1529  
1530  	return inBodyIM(p)
1531  }
1532  
1533  // Section 12.2.6.4.11.
1534  func inCaptionIM(p *parser) bool {
1535  	switch p.tok.Type {
1536  	case StartTagToken:
1537  		switch p.tok.DataAtom {
1538  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1539  			if !p.popUntil(tableScope, a.Caption) {
1540  				// Ignore the token.
1541  				return true
1542  			}
1543  			p.clearActiveFormattingElements()
1544  			p.im = inTableIM
1545  			return false
1546  		case a.Select:
1547  			p.reconstructActiveFormattingElements()
1548  			p.addElement()
1549  			p.framesetOK = false
1550  			p.im = inSelectInTableIM
1551  			return true
1552  		}
1553  	case EndTagToken:
1554  		switch p.tok.DataAtom {
1555  		case a.Caption:
1556  			if p.popUntil(tableScope, a.Caption) {
1557  				p.clearActiveFormattingElements()
1558  				p.im = inTableIM
1559  			}
1560  			return true
1561  		case a.Table:
1562  			if !p.popUntil(tableScope, a.Caption) {
1563  				// Ignore the token.
1564  				return true
1565  			}
1566  			p.clearActiveFormattingElements()
1567  			p.im = inTableIM
1568  			return false
1569  		case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1570  			// Ignore the token.
1571  			return true
1572  		}
1573  	}
1574  	return inBodyIM(p)
1575  }
1576  
1577  // Section 12.2.6.4.12.
1578  func inColumnGroupIM(p *parser) bool {
1579  	switch p.tok.Type {
1580  	case TextToken:
1581  		s := strings.TrimLeft(p.tok.Data, whitespace)
1582  		if len(s) < len(p.tok.Data) {
1583  			// Add the initial whitespace to the current node.
1584  			p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1585  			if s == "" {
1586  				return true
1587  			}
1588  			p.tok.Data = s
1589  		}
1590  	case CommentToken:
1591  		p.addChild(&Node{
1592  			Type: CommentNode,
1593  			Data: p.tok.Data,
1594  		})
1595  		return true
1596  	case DoctypeToken:
1597  		// Ignore the token.
1598  		return true
1599  	case StartTagToken:
1600  		switch p.tok.DataAtom {
1601  		case a.Html:
1602  			return inBodyIM(p)
1603  		case a.Col:
1604  			p.addElement()
1605  			p.oe.pop()
1606  			p.acknowledgeSelfClosingTag()
1607  			return true
1608  		case a.Template:
1609  			return inHeadIM(p)
1610  		}
1611  	case EndTagToken:
1612  		switch p.tok.DataAtom {
1613  		case a.Colgroup:
1614  			if p.oe.top().DataAtom == a.Colgroup {
1615  				p.oe.pop()
1616  				p.im = inTableIM
1617  			}
1618  			return true
1619  		case a.Col:
1620  			// Ignore the token.
1621  			return true
1622  		case a.Template:
1623  			return inHeadIM(p)
1624  		}
1625  	case ErrorToken:
1626  		return inBodyIM(p)
1627  	}
1628  	if p.oe.top().DataAtom != a.Colgroup {
1629  		return true
1630  	}
1631  	p.oe.pop()
1632  	p.im = inTableIM
1633  	return false
1634  }
1635  
1636  // Section 12.2.6.4.13.
1637  func inTableBodyIM(p *parser) bool {
1638  	switch p.tok.Type {
1639  	case StartTagToken:
1640  		switch p.tok.DataAtom {
1641  		case a.Tr:
1642  			p.clearStackToContext(tableBodyScope)
1643  			p.addElement()
1644  			p.im = inRowIM
1645  			return true
1646  		case a.Td, a.Th:
1647  			p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1648  			return false
1649  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1650  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1651  				p.im = inTableIM
1652  				return false
1653  			}
1654  			// Ignore the token.
1655  			return true
1656  		}
1657  	case EndTagToken:
1658  		switch p.tok.DataAtom {
1659  		case a.Tbody, a.Tfoot, a.Thead:
1660  			if p.elementInScope(tableScope, p.tok.DataAtom) {
1661  				p.clearStackToContext(tableBodyScope)
1662  				p.oe.pop()
1663  				p.im = inTableIM
1664  			}
1665  			return true
1666  		case a.Table:
1667  			if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1668  				p.im = inTableIM
1669  				return false
1670  			}
1671  			// Ignore the token.
1672  			return true
1673  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1674  			// Ignore the token.
1675  			return true
1676  		}
1677  	case CommentToken:
1678  		p.addChild(&Node{
1679  			Type: CommentNode,
1680  			Data: p.tok.Data,
1681  		})
1682  		return true
1683  	}
1684  
1685  	return inTableIM(p)
1686  }
1687  
1688  // Section 13.2.6.4.14.
1689  func inRowIM(p *parser) bool {
1690  	switch p.tok.Type {
1691  	case StartTagToken:
1692  		switch p.tok.DataAtom {
1693  		case a.Td, a.Th:
1694  			p.clearStackToContext(tableRowScope)
1695  			p.addElement()
1696  			p.afe = append(p.afe, &scopeMarker)
1697  			p.im = inCellIM
1698  			return true
1699  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1700  			if p.elementInScope(tableScope, a.Tr) {
1701  				p.clearStackToContext(tableRowScope)
1702  				p.oe.pop()
1703  				p.im = inTableBodyIM
1704  				return false
1705  			}
1706  			// Ignore the token.
1707  			return true
1708  		}
1709  	case EndTagToken:
1710  		switch p.tok.DataAtom {
1711  		case a.Tr:
1712  			if p.elementInScope(tableScope, a.Tr) {
1713  				p.clearStackToContext(tableRowScope)
1714  				p.oe.pop()
1715  				p.im = inTableBodyIM
1716  				return true
1717  			}
1718  			// Ignore the token.
1719  			return true
1720  		case a.Table:
1721  			if p.elementInScope(tableScope, a.Tr) {
1722  				p.clearStackToContext(tableRowScope)
1723  				p.oe.pop()
1724  				p.im = inTableBodyIM
1725  				return false
1726  			}
1727  			// Ignore the token.
1728  			return true
1729  		case a.Tbody, a.Tfoot, a.Thead:
1730  			if p.elementInScope(tableScope, p.tok.DataAtom) && p.elementInScope(tableScope, a.Tr) {
1731  				p.clearStackToContext(tableRowScope)
1732  				p.oe.pop()
1733  				p.im = inTableBodyIM
1734  				return false
1735  			}
1736  			// Ignore the token.
1737  			return true
1738  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1739  			// Ignore the token.
1740  			return true
1741  		}
1742  	}
1743  
1744  	return inTableIM(p)
1745  }
1746  
1747  // Section 12.2.6.4.15.
1748  func inCellIM(p *parser) bool {
1749  	switch p.tok.Type {
1750  	case StartTagToken:
1751  		switch p.tok.DataAtom {
1752  		case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1753  			if p.popUntil(tableScope, a.Td, a.Th) {
1754  				// Close the cell and reprocess.
1755  				p.clearActiveFormattingElements()
1756  				p.im = inRowIM
1757  				return false
1758  			}
1759  			// Ignore the token.
1760  			return true
1761  		case a.Select:
1762  			p.reconstructActiveFormattingElements()
1763  			p.addElement()
1764  			p.framesetOK = false
1765  			p.im = inSelectInTableIM
1766  			return true
1767  		}
1768  	case EndTagToken:
1769  		switch p.tok.DataAtom {
1770  		case a.Td, a.Th:
1771  			if !p.popUntil(tableScope, p.tok.DataAtom) {
1772  				// Ignore the token.
1773  				return true
1774  			}
1775  			p.clearActiveFormattingElements()
1776  			p.im = inRowIM
1777  			return true
1778  		case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1779  			// Ignore the token.
1780  			return true
1781  		case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1782  			if !p.elementInScope(tableScope, p.tok.DataAtom) {
1783  				// Ignore the token.
1784  				return true
1785  			}
1786  			// Close the cell and reprocess.
1787  			if p.popUntil(tableScope, a.Td, a.Th) {
1788  				p.clearActiveFormattingElements()
1789  			}
1790  			p.im = inRowIM
1791  			return false
1792  		}
1793  	}
1794  	return inBodyIM(p)
1795  }
1796  
1797  // Section 12.2.6.4.16.
1798  func inSelectIM(p *parser) bool {
1799  	switch p.tok.Type {
1800  	case TextToken:
1801  		p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1802  	case StartTagToken:
1803  		switch p.tok.DataAtom {
1804  		case a.Html:
1805  			return inBodyIM(p)
1806  		case a.Option:
1807  			if p.top().DataAtom == a.Option {
1808  				p.oe.pop()
1809  			}
1810  			p.addElement()
1811  		case a.Optgroup:
1812  			if p.top().DataAtom == a.Option {
1813  				p.oe.pop()
1814  			}
1815  			if p.top().DataAtom == a.Optgroup {
1816  				p.oe.pop()
1817  			}
1818  			p.addElement()
1819  		case a.Select:
1820  			if !p.popUntil(selectScope, a.Select) {
1821  				// Ignore the token.
1822  				return true
1823  			}
1824  			p.resetInsertionMode()
1825  		case a.Input, a.Keygen, a.Textarea:
1826  			if p.elementInScope(selectScope, a.Select) {
1827  				p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1828  				return false
1829  			}
1830  			// In order to properly ignore <textarea>, we need to change the tokenizer mode.
1831  			p.tokenizer.NextIsNotRawText()
1832  			// Ignore the token.
1833  			return true
1834  		case a.Script, a.Template:
1835  			return inHeadIM(p)
1836  		case a.Iframe, a.Noembed, a.Noframes, a.Noscript, a.Plaintext, a.Style, a.Title, a.Xmp:
1837  			// Don't let the tokenizer go into raw text mode when there are raw tags
1838  			// to be ignored. These tags should be ignored from the tokenizer
1839  			// properly.
1840  			p.tokenizer.NextIsNotRawText()
1841  			// Ignore the token.
1842  			return true
1843  		}
1844  	case EndTagToken:
1845  		switch p.tok.DataAtom {
1846  		case a.Option:
1847  			if p.top().DataAtom == a.Option {
1848  				p.oe.pop()
1849  			}
1850  		case a.Optgroup:
1851  			i := len(p.oe) - 1
1852  			if p.oe[i].DataAtom == a.Option {
1853  				i--
1854  			}
1855  			if p.oe[i].DataAtom == a.Optgroup {
1856  				p.oe = p.oe[:i]
1857  			}
1858  		case a.Select:
1859  			if !p.popUntil(selectScope, a.Select) {
1860  				// Ignore the token.
1861  				return true
1862  			}
1863  			p.resetInsertionMode()
1864  		case a.Template:
1865  			return inHeadIM(p)
1866  		}
1867  	case CommentToken:
1868  		p.addChild(&Node{
1869  			Type: CommentNode,
1870  			Data: p.tok.Data,
1871  		})
1872  	case DoctypeToken:
1873  		// Ignore the token.
1874  		return true
1875  	case ErrorToken:
1876  		return inBodyIM(p)
1877  	}
1878  
1879  	return true
1880  }
1881  
1882  // Section 12.2.6.4.17.
1883  func inSelectInTableIM(p *parser) bool {
1884  	switch p.tok.Type {
1885  	case StartTagToken, EndTagToken:
1886  		switch p.tok.DataAtom {
1887  		case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1888  			if p.tok.Type == EndTagToken && !p.elementInScope(tableScope, p.tok.DataAtom) {
1889  				// Ignore the token.
1890  				return true
1891  			}
1892  			// This is like p.popUntil(selectScope, a.Select), but it also
1893  			// matches <math select>, not just <select>. Matching the MathML
1894  			// tag is arguably incorrect (conceptually), but it mimics what
1895  			// Chromium does.
1896  			for i := len(p.oe) - 1; i >= 0; i-- {
1897  				if n := p.oe[i]; n.DataAtom == a.Select {
1898  					p.oe = p.oe[:i]
1899  					break
1900  				}
1901  			}
1902  			p.resetInsertionMode()
1903  			return false
1904  		}
1905  	}
1906  	return inSelectIM(p)
1907  }
1908  
1909  // Section 12.2.6.4.18.
1910  func inTemplateIM(p *parser) bool {
1911  	switch p.tok.Type {
1912  	case TextToken, CommentToken, DoctypeToken:
1913  		return inBodyIM(p)
1914  	case StartTagToken:
1915  		switch p.tok.DataAtom {
1916  		case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Template, a.Title:
1917  			return inHeadIM(p)
1918  		case a.Caption, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1919  			p.templateStack.pop()
1920  			p.templateStack = append(p.templateStack, inTableIM)
1921  			p.im = inTableIM
1922  			return false
1923  		case a.Col:
1924  			p.templateStack.pop()
1925  			p.templateStack = append(p.templateStack, inColumnGroupIM)
1926  			p.im = inColumnGroupIM
1927  			return false
1928  		case a.Tr:
1929  			p.templateStack.pop()
1930  			p.templateStack = append(p.templateStack, inTableBodyIM)
1931  			p.im = inTableBodyIM
1932  			return false
1933  		case a.Td, a.Th:
1934  			p.templateStack.pop()
1935  			p.templateStack = append(p.templateStack, inRowIM)
1936  			p.im = inRowIM
1937  			return false
1938  		default:
1939  			p.templateStack.pop()
1940  			p.templateStack = append(p.templateStack, inBodyIM)
1941  			p.im = inBodyIM
1942  			return false
1943  		}
1944  	case EndTagToken:
1945  		switch p.tok.DataAtom {
1946  		case a.Template:
1947  			return inHeadIM(p)
1948  		default:
1949  			// Ignore the token.
1950  			return true
1951  		}
1952  	case ErrorToken:
1953  		if !p.oe.contains(a.Template) {
1954  			// Ignore the token.
1955  			return true
1956  		}
1957  		// TODO: remove this divergence from the HTML5 spec.
1958  		//
1959  		// See https://bugs.chromium.org/p/chromium/issues/detail?id=829668
1960  		p.generateImpliedEndTags()
1961  		for i := len(p.oe) - 1; i >= 0; i-- {
1962  			if n := p.oe[i]; n.Namespace == "" && n.DataAtom == a.Template {
1963  				p.oe = p.oe[:i]
1964  				break
1965  			}
1966  		}
1967  		p.clearActiveFormattingElements()
1968  		p.templateStack.pop()
1969  		p.resetInsertionMode()
1970  		return false
1971  	}
1972  	return false
1973  }
1974  
1975  // Section 12.2.6.4.19.
1976  func afterBodyIM(p *parser) bool {
1977  	switch p.tok.Type {
1978  	case ErrorToken:
1979  		// Stop parsing.
1980  		return true
1981  	case TextToken:
1982  		s := strings.TrimLeft(p.tok.Data, whitespace)
1983  		if len(s) == 0 {
1984  			// It was all whitespace.
1985  			return inBodyIM(p)
1986  		}
1987  	case StartTagToken:
1988  		if p.tok.DataAtom == a.Html {
1989  			return inBodyIM(p)
1990  		}
1991  	case EndTagToken:
1992  		if p.tok.DataAtom == a.Html {
1993  			if !p.fragment {
1994  				p.im = afterAfterBodyIM
1995  			}
1996  			return true
1997  		}
1998  	case CommentToken:
1999  		// The comment is attached to the <html> element.
2000  		if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
2001  			panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
2002  		}
2003  		p.oe[0].AppendChild(&Node{
2004  			Type: CommentNode,
2005  			Data: p.tok.Data,
2006  		})
2007  		return true
2008  	}
2009  	p.im = inBodyIM
2010  	return false
2011  }
2012  
2013  // Section 12.2.6.4.20.
2014  func inFramesetIM(p *parser) bool {
2015  	switch p.tok.Type {
2016  	case CommentToken:
2017  		p.addChild(&Node{
2018  			Type: CommentNode,
2019  			Data: p.tok.Data,
2020  		})
2021  	case TextToken:
2022  		// Ignore all text but whitespace.
2023  		s := strings.Map(func(c rune) rune {
2024  			switch c {
2025  			case ' ', '\t', '\n', '\f', '\r':
2026  				return c
2027  			}
2028  			return -1
2029  		}, p.tok.Data)
2030  		if s != "" {
2031  			p.addText(s)
2032  		}
2033  	case StartTagToken:
2034  		switch p.tok.DataAtom {
2035  		case a.Html:
2036  			return inBodyIM(p)
2037  		case a.Frameset:
2038  			p.addElement()
2039  		case a.Frame:
2040  			p.addElement()
2041  			p.oe.pop()
2042  			p.acknowledgeSelfClosingTag()
2043  		case a.Noframes:
2044  			return inHeadIM(p)
2045  		}
2046  	case EndTagToken:
2047  		switch p.tok.DataAtom {
2048  		case a.Frameset:
2049  			if p.oe.top().DataAtom != a.Html {
2050  				p.oe.pop()
2051  				if p.oe.top().DataAtom != a.Frameset {
2052  					p.im = afterFramesetIM
2053  					return true
2054  				}
2055  			}
2056  		}
2057  	default:
2058  		// Ignore the token.
2059  	}
2060  	return true
2061  }
2062  
2063  // Section 12.2.6.4.21.
2064  func afterFramesetIM(p *parser) bool {
2065  	switch p.tok.Type {
2066  	case CommentToken:
2067  		p.addChild(&Node{
2068  			Type: CommentNode,
2069  			Data: p.tok.Data,
2070  		})
2071  	case TextToken:
2072  		// Ignore all text but whitespace.
2073  		s := strings.Map(func(c rune) rune {
2074  			switch c {
2075  			case ' ', '\t', '\n', '\f', '\r':
2076  				return c
2077  			}
2078  			return -1
2079  		}, p.tok.Data)
2080  		if s != "" {
2081  			p.addText(s)
2082  		}
2083  	case StartTagToken:
2084  		switch p.tok.DataAtom {
2085  		case a.Html:
2086  			return inBodyIM(p)
2087  		case a.Noframes:
2088  			return inHeadIM(p)
2089  		}
2090  	case EndTagToken:
2091  		switch p.tok.DataAtom {
2092  		case a.Html:
2093  			p.im = afterAfterFramesetIM
2094  			return true
2095  		}
2096  	default:
2097  		// Ignore the token.
2098  	}
2099  	return true
2100  }
2101  
2102  // Section 12.2.6.4.22.
2103  func afterAfterBodyIM(p *parser) bool {
2104  	switch p.tok.Type {
2105  	case ErrorToken:
2106  		// Stop parsing.
2107  		return true
2108  	case TextToken:
2109  		s := strings.TrimLeft(p.tok.Data, whitespace)
2110  		if len(s) == 0 {
2111  			// It was all whitespace.
2112  			return inBodyIM(p)
2113  		}
2114  	case StartTagToken:
2115  		if p.tok.DataAtom == a.Html {
2116  			return inBodyIM(p)
2117  		}
2118  	case CommentToken:
2119  		p.doc.AppendChild(&Node{
2120  			Type: CommentNode,
2121  			Data: p.tok.Data,
2122  		})
2123  		return true
2124  	case DoctypeToken:
2125  		return inBodyIM(p)
2126  	}
2127  	p.im = inBodyIM
2128  	return false
2129  }
2130  
2131  // Section 12.2.6.4.23.
2132  func afterAfterFramesetIM(p *parser) bool {
2133  	switch p.tok.Type {
2134  	case CommentToken:
2135  		p.doc.AppendChild(&Node{
2136  			Type: CommentNode,
2137  			Data: p.tok.Data,
2138  		})
2139  	case TextToken:
2140  		// Ignore all text but whitespace.
2141  		s := strings.Map(func(c rune) rune {
2142  			switch c {
2143  			case ' ', '\t', '\n', '\f', '\r':
2144  				return c
2145  			}
2146  			return -1
2147  		}, p.tok.Data)
2148  		if s != "" {
2149  			p.tok.Data = s
2150  			return inBodyIM(p)
2151  		}
2152  	case StartTagToken:
2153  		switch p.tok.DataAtom {
2154  		case a.Html:
2155  			return inBodyIM(p)
2156  		case a.Noframes:
2157  			return inHeadIM(p)
2158  		}
2159  	case DoctypeToken:
2160  		return inBodyIM(p)
2161  	default:
2162  		// Ignore the token.
2163  	}
2164  	return true
2165  }
2166  
2167  func ignoreTheRemainingTokens(p *parser) bool {
2168  	return true
2169  }
2170  
2171  const whitespaceOrNUL = whitespace + "\x00"
2172  
2173  // Section 12.2.6.5
2174  func parseForeignContent(p *parser) bool {
2175  	switch p.tok.Type {
2176  	case TextToken:
2177  		if p.framesetOK {
2178  			p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
2179  		}
2180  		p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
2181  		p.addText(p.tok.Data)
2182  	case CommentToken:
2183  		p.addChild(&Node{
2184  			Type: CommentNode,
2185  			Data: p.tok.Data,
2186  		})
2187  	case StartTagToken:
2188  		if !p.fragment {
2189  			b := breakout[p.tok.Data]
2190  			if p.tok.DataAtom == a.Font {
2191  			loop:
2192  				for _, attr := range p.tok.Attr {
2193  					switch attr.Key {
2194  					case "color", "face", "size":
2195  						b = true
2196  						break loop
2197  					}
2198  				}
2199  			}
2200  			if b {
2201  				for i := len(p.oe) - 1; i >= 0; i-- {
2202  					n := p.oe[i]
2203  					if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
2204  						p.oe = p.oe[:i+1]
2205  						break
2206  					}
2207  				}
2208  				return false
2209  			}
2210  		}
2211  		current := p.adjustedCurrentNode()
2212  		switch current.Namespace {
2213  		case "math":
2214  			adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
2215  		case "svg":
2216  			// Adjust SVG tag names. The tokenizer lower-cases tag names, but
2217  			// SVG wants e.g. "foreignObject" with a capital second "O".
2218  			if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
2219  				p.tok.DataAtom = a.Lookup([]byte(x))
2220  				p.tok.Data = x
2221  			}
2222  			adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
2223  		default:
2224  			panic("html: bad parser state: unexpected namespace")
2225  		}
2226  		adjustForeignAttributes(p.tok.Attr)
2227  		namespace := current.Namespace
2228  		p.addElement()
2229  		p.top().Namespace = namespace
2230  		if namespace != "" {
2231  			// Don't let the tokenizer go into raw text mode in foreign content
2232  			// (e.g. in an SVG <title> tag).
2233  			p.tokenizer.NextIsNotRawText()
2234  		}
2235  		if p.hasSelfClosingToken {
2236  			p.oe.pop()
2237  			p.acknowledgeSelfClosingTag()
2238  		}
2239  	case EndTagToken:
2240  		if strings.EqualFold(p.oe[len(p.oe)-1].Data, p.tok.Data) {
2241  			p.oe = p.oe[:len(p.oe)-1]
2242  			return true
2243  		}
2244  		for i := len(p.oe) - 1; i >= 0; i-- {
2245  			if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
2246  				p.oe = p.oe[:i]
2247  				return true
2248  			}
2249  			if i > 0 && p.oe[i-1].Namespace == "" {
2250  				break
2251  			}
2252  		}
2253  		return p.im(p)
2254  	default:
2255  		// Ignore the token.
2256  	}
2257  	return true
2258  }
2259  
2260  // Section 12.2.4.2.
2261  func (p *parser) adjustedCurrentNode() *Node {
2262  	if len(p.oe) == 1 && p.fragment && p.context != nil {
2263  		return p.context
2264  	}
2265  	return p.oe.top()
2266  }
2267  
2268  // Section 12.2.6.
2269  func (p *parser) inForeignContent() bool {
2270  	if len(p.oe) == 0 {
2271  		return false
2272  	}
2273  	n := p.adjustedCurrentNode()
2274  	if n.Namespace == "" {
2275  		return false
2276  	}
2277  	if mathMLTextIntegrationPoint(n) {
2278  		if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
2279  			return false
2280  		}
2281  		if p.tok.Type == TextToken {
2282  			return false
2283  		}
2284  	}
2285  	if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
2286  		return false
2287  	}
2288  	if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
2289  		return false
2290  	}
2291  	if p.tok.Type == ErrorToken {
2292  		return false
2293  	}
2294  	return true
2295  }
2296  
2297  // parseImpliedToken parses a token as though it had appeared in the parser's
2298  // input.
2299  func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
2300  	realToken, selfClosing := p.tok, p.hasSelfClosingToken
2301  	p.tok = Token{
2302  		Type:     t,
2303  		DataAtom: dataAtom,
2304  		Data:     data,
2305  	}
2306  	p.hasSelfClosingToken = false
2307  	p.parseCurrentToken()
2308  	p.tok, p.hasSelfClosingToken = realToken, selfClosing
2309  }
2310  
2311  // parseCurrentToken runs the current token through the parsing routines
2312  // until it is consumed.
2313  func (p *parser) parseCurrentToken() {
2314  	if p.tok.Type == SelfClosingTagToken {
2315  		p.hasSelfClosingToken = true
2316  		p.tok.Type = StartTagToken
2317  	}
2318  
2319  	consumed := false
2320  	for !consumed {
2321  		if p.inForeignContent() {
2322  			consumed = parseForeignContent(p)
2323  		} else {
2324  			consumed = p.im(p)
2325  		}
2326  	}
2327  
2328  	if p.hasSelfClosingToken {
2329  		// This is a parse error, but ignore it.
2330  		p.hasSelfClosingToken = false
2331  	}
2332  }
2333  
2334  func (p *parser) parse() (err error) {
2335  	defer func() {
2336  		if panicErr := recover(); panicErr != nil {
2337  			err = fmt.Errorf("%s", panicErr)
2338  		}
2339  	}()
2340  	// Iterate until EOF. Any other error will cause an early return.
2341  	for err != io.EOF {
2342  		// CDATA sections are allowed only in foreign content.
2343  		n := p.oe.top()
2344  		p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2345  		// Read and parse the next token.
2346  		p.tokenizer.Next()
2347  		p.tok = p.tokenizer.Token()
2348  		if p.tok.Type == ErrorToken {
2349  			err = p.tokenizer.Err()
2350  			if err != nil && err != io.EOF {
2351  				return err
2352  			}
2353  		}
2354  		p.parseCurrentToken()
2355  	}
2356  	return nil
2357  }
2358  
2359  // Parse returns the parse tree for the HTML from the given Reader.
2360  //
2361  // It implements the HTML5 parsing algorithm
2362  // (https://html.spec.whatwg.org/multipage/syntax.html#tree-construction),
2363  // which is very complicated. The resultant tree can contain implicitly created
2364  // nodes that have no explicit <tag> listed in r's data, and nodes' parents can
2365  // differ from the nesting implied by a naive processing of start and end
2366  // <tag>s. Conversely, explicit <tag>s in r's data can be silently dropped,
2367  // with no corresponding node in the resulting tree.
2368  //
2369  // Parse will reject HTML that is nested deeper than 512 elements.
2370  //
2371  // The input is assumed to be UTF-8 encoded.
2372  func Parse(r io.Reader) (*Node, error) {
2373  	return ParseWithOptions(r)
2374  }
2375  
2376  // ParseFragment parses a fragment of HTML and returns the nodes that were
2377  // found. If the fragment is the InnerHTML for an existing element, pass that
2378  // element in context.
2379  //
2380  // It has the same intricacies as Parse.
2381  func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2382  	return ParseFragmentWithOptions(r, context)
2383  }
2384  
2385  // ParseOption configures a parser.
2386  type ParseOption func(p *parser)
2387  
2388  // ParseOptionEnableScripting configures the scripting flag.
2389  // https://html.spec.whatwg.org/multipage/webappapis.html#enabling-and-disabling-scripting
2390  //
2391  // By default, scripting is enabled.
2392  func ParseOptionEnableScripting(enable bool) ParseOption {
2393  	return func(p *parser) {
2394  		p.scripting = enable
2395  	}
2396  }
2397  
2398  // ParseWithOptions is like Parse, with options.
2399  func ParseWithOptions(r io.Reader, opts ...ParseOption) (*Node, error) {
2400  	p := &parser{
2401  		tokenizer: NewTokenizer(r),
2402  		doc: &Node{
2403  			Type: DocumentNode,
2404  		},
2405  		scripting:  true,
2406  		framesetOK: true,
2407  		im:         initialIM,
2408  	}
2409  
2410  	for _, f := range opts {
2411  		f(p)
2412  	}
2413  
2414  	if err := p.parse(); err != nil {
2415  		return nil, err
2416  	}
2417  	return p.doc, nil
2418  }
2419  
2420  // ParseFragmentWithOptions is like ParseFragment, with options.
2421  func ParseFragmentWithOptions(r io.Reader, context *Node, opts ...ParseOption) ([]*Node, error) {
2422  	contextTag := ""
2423  	if context != nil {
2424  		if context.Type != ElementNode {
2425  			return nil, errors.New("html: ParseFragment of non-element Node")
2426  		}
2427  		// The next check isn't just context.DataAtom.String() == context.Data because
2428  		// it is valid to pass an element whose tag isn't a known atom. For example,
2429  		// DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2430  		if context.DataAtom != a.Lookup([]byte(context.Data)) {
2431  			return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2432  		}
2433  		contextTag = context.DataAtom.String()
2434  	}
2435  	p := &parser{
2436  		doc: &Node{
2437  			Type: DocumentNode,
2438  		},
2439  		scripting: true,
2440  		fragment:  true,
2441  		context:   context,
2442  	}
2443  	if context != nil && context.Namespace != "" {
2444  		p.tokenizer = NewTokenizer(r)
2445  	} else {
2446  		p.tokenizer = NewTokenizerFragment(r, contextTag)
2447  	}
2448  
2449  	for _, f := range opts {
2450  		f(p)
2451  	}
2452  
2453  	root := &Node{
2454  		Type:     ElementNode,
2455  		DataAtom: a.Html,
2456  		Data:     a.Html.String(),
2457  	}
2458  	p.doc.AppendChild(root)
2459  	p.oe = nodeStack{root}
2460  	if context != nil && context.DataAtom == a.Template {
2461  		p.templateStack = append(p.templateStack, inTemplateIM)
2462  	}
2463  	p.resetInsertionMode()
2464  
2465  	for n := context; n != nil; n = n.Parent {
2466  		if n.Type == ElementNode && n.DataAtom == a.Form {
2467  			p.form = n
2468  			break
2469  		}
2470  	}
2471  
2472  	if err := p.parse(); err != nil {
2473  		return nil, err
2474  	}
2475  
2476  	parent := p.doc
2477  	if context != nil {
2478  		parent = root
2479  	}
2480  
2481  	var result []*Node
2482  	for c := parent.FirstChild; c != nil; {
2483  		next := c.NextSibling
2484  		parent.RemoveChild(c)
2485  		result = append(result, c)
2486  		c = next
2487  	}
2488  	return result, nil
2489  }
2490