atom.go raw

   1  // Copyright 2012 The Go Authors. All rights reserved.
   2  // Use of this source code is governed by a BSD-style
   3  // license that can be found in the LICENSE file.
   4  
   5  // Package atom provides integer codes (also known as atoms) for a fixed set of
   6  // frequently occurring HTML strings: tag names and attribute keys such as "p"
   7  // and "id".
   8  //
   9  // Sharing an atom's name between all elements with the same tag can result in
  10  // fewer string allocations when tokenizing and parsing HTML. Integer
  11  // comparisons are also generally faster than string comparisons.
  12  //
  13  // The value of an atom's particular code is not guaranteed to stay the same
  14  // between versions of this package. Neither is any ordering guaranteed:
  15  // whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
  16  // be dense. The only guarantees are that e.g. looking up "div" will yield
  17  // atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
  18  package atom // import "golang.org/x/net/html/atom"
  19  
  20  // Atom is an integer code for a string. The zero value maps to "".
  21  type Atom uint32
  22  
  23  // String returns the atom's name.
  24  func (a Atom) String() string {
  25  	start := uint32(a >> 8)
  26  	n := uint32(a & 0xff)
  27  	if start+n > uint32(len(atomText)) {
  28  		return ""
  29  	}
  30  	return atomText[start : start+n]
  31  }
  32  
  33  func (a Atom) string() string {
  34  	return atomText[a>>8 : a>>8+a&0xff]
  35  }
  36  
  37  // fnv computes the FNV hash with an arbitrary starting value h.
  38  func fnv(h uint32, s []byte) uint32 {
  39  	for i := range s {
  40  		h ^= uint32(s[i])
  41  		h *= 16777619
  42  	}
  43  	return h
  44  }
  45  
  46  func match(s string, t []byte) bool {
  47  	for i, c := range t {
  48  		if s[i] != c {
  49  			return false
  50  		}
  51  	}
  52  	return true
  53  }
  54  
  55  // Lookup returns the atom whose name is s. It returns zero if there is no
  56  // such atom. The lookup is case sensitive.
  57  func Lookup(s []byte) Atom {
  58  	if len(s) == 0 || len(s) > maxAtomLen {
  59  		return 0
  60  	}
  61  	h := fnv(hash0, s)
  62  	if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
  63  		return a
  64  	}
  65  	if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
  66  		return a
  67  	}
  68  	return 0
  69  }
  70  
  71  // String returns a string whose contents are equal to s. In that sense, it is
  72  // equivalent to string(s) but may be more efficient.
  73  func String(s []byte) string {
  74  	if a := Lookup(s); a != 0 {
  75  		return a.String()
  76  	}
  77  	return string(s)
  78  }
  79