xxhash.go raw

   1  // Package xxhash implements the 64-bit variant of xxHash (XXH64) as described
   2  // at http://cyan4973.github.io/xxHash/.
   3  package xxhash
   4  
   5  import (
   6  	"encoding/binary"
   7  	"errors"
   8  	"math/bits"
   9  )
  10  
  11  const (
  12  	prime1 uint64 = 11400714785074694791
  13  	prime2 uint64 = 14029467366897019727
  14  	prime3 uint64 = 1609587929392839161
  15  	prime4 uint64 = 9650029242287828579
  16  	prime5 uint64 = 2870177450012600261
  17  )
  18  
  19  // Store the primes in an array as well.
  20  //
  21  // The consts are used when possible in Go code to avoid MOVs but we need a
  22  // contiguous array for the assembly code.
  23  var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
  24  
  25  // Digest implements hash.Hash64.
  26  //
  27  // Note that a zero-valued Digest is not ready to receive writes.
  28  // Call Reset or create a Digest using New before calling other methods.
  29  type Digest struct {
  30  	v1    uint64
  31  	v2    uint64
  32  	v3    uint64
  33  	v4    uint64
  34  	total uint64
  35  	mem   [32]byte
  36  	n     int // how much of mem is used
  37  }
  38  
  39  // New creates a new Digest with a zero seed.
  40  func New() *Digest {
  41  	return NewWithSeed(0)
  42  }
  43  
  44  // NewWithSeed creates a new Digest with the given seed.
  45  func NewWithSeed(seed uint64) *Digest {
  46  	var d Digest
  47  	d.ResetWithSeed(seed)
  48  	return &d
  49  }
  50  
  51  // Reset clears the Digest's state so that it can be reused.
  52  // It uses a seed value of zero.
  53  func (d *Digest) Reset() {
  54  	d.ResetWithSeed(0)
  55  }
  56  
  57  // ResetWithSeed clears the Digest's state so that it can be reused.
  58  // It uses the given seed to initialize the state.
  59  func (d *Digest) ResetWithSeed(seed uint64) {
  60  	d.v1 = seed + prime1 + prime2
  61  	d.v2 = seed + prime2
  62  	d.v3 = seed
  63  	d.v4 = seed - prime1
  64  	d.total = 0
  65  	d.n = 0
  66  }
  67  
  68  // Size always returns 8 bytes.
  69  func (d *Digest) Size() int { return 8 }
  70  
  71  // BlockSize always returns 32 bytes.
  72  func (d *Digest) BlockSize() int { return 32 }
  73  
  74  // Write adds more data to d. It always returns len(b), nil.
  75  func (d *Digest) Write(b []byte) (n int, err error) {
  76  	n = len(b)
  77  	d.total += uint64(n)
  78  
  79  	memleft := d.mem[d.n&(len(d.mem)-1):]
  80  
  81  	if d.n+n < 32 {
  82  		// This new data doesn't even fill the current block.
  83  		copy(memleft, b)
  84  		d.n += n
  85  		return
  86  	}
  87  
  88  	if d.n > 0 {
  89  		// Finish off the partial block.
  90  		c := copy(memleft, b)
  91  		d.v1 = round(d.v1, u64(d.mem[0:8]))
  92  		d.v2 = round(d.v2, u64(d.mem[8:16]))
  93  		d.v3 = round(d.v3, u64(d.mem[16:24]))
  94  		d.v4 = round(d.v4, u64(d.mem[24:32]))
  95  		b = b[c:]
  96  		d.n = 0
  97  	}
  98  
  99  	if len(b) >= 32 {
 100  		// One or more full blocks left.
 101  		nw := writeBlocks(d, b)
 102  		b = b[nw:]
 103  	}
 104  
 105  	// Store any remaining partial block.
 106  	copy(d.mem[:], b)
 107  	d.n = len(b)
 108  
 109  	return
 110  }
 111  
 112  // Sum appends the current hash to b and returns the resulting slice.
 113  func (d *Digest) Sum(b []byte) []byte {
 114  	s := d.Sum64()
 115  	return append(
 116  		b,
 117  		byte(s>>56),
 118  		byte(s>>48),
 119  		byte(s>>40),
 120  		byte(s>>32),
 121  		byte(s>>24),
 122  		byte(s>>16),
 123  		byte(s>>8),
 124  		byte(s),
 125  	)
 126  }
 127  
 128  // Sum64 returns the current hash.
 129  func (d *Digest) Sum64() uint64 {
 130  	var h uint64
 131  
 132  	if d.total >= 32 {
 133  		v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4
 134  		h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4)
 135  		h = mergeRound(h, v1)
 136  		h = mergeRound(h, v2)
 137  		h = mergeRound(h, v3)
 138  		h = mergeRound(h, v4)
 139  	} else {
 140  		h = d.v3 + prime5
 141  	}
 142  
 143  	h += d.total
 144  
 145  	b := d.mem[:d.n&(len(d.mem)-1)]
 146  	for ; len(b) >= 8; b = b[8:] {
 147  		k1 := round(0, u64(b[:8]))
 148  		h ^= k1
 149  		h = rol27(h)*prime1 + prime4
 150  	}
 151  	if len(b) >= 4 {
 152  		h ^= uint64(u32(b[:4])) * prime1
 153  		h = rol23(h)*prime2 + prime3
 154  		b = b[4:]
 155  	}
 156  	for ; len(b) > 0; b = b[1:] {
 157  		h ^= uint64(b[0]) * prime5
 158  		h = rol11(h) * prime1
 159  	}
 160  
 161  	h ^= h >> 33
 162  	h *= prime2
 163  	h ^= h >> 29
 164  	h *= prime3
 165  	h ^= h >> 32
 166  
 167  	return h
 168  }
 169  
 170  const (
 171  	magic         = "xxh\x06"
 172  	marshaledSize = len(magic) + 8*5 + 32
 173  )
 174  
 175  // MarshalBinary implements the encoding.BinaryMarshaler interface.
 176  func (d *Digest) MarshalBinary() ([]byte, error) {
 177  	b := make([]byte, 0, marshaledSize)
 178  	b = append(b, magic...)
 179  	b = appendUint64(b, d.v1)
 180  	b = appendUint64(b, d.v2)
 181  	b = appendUint64(b, d.v3)
 182  	b = appendUint64(b, d.v4)
 183  	b = appendUint64(b, d.total)
 184  	b = append(b, d.mem[:d.n]...)
 185  	b = b[:len(b)+len(d.mem)-d.n]
 186  	return b, nil
 187  }
 188  
 189  // UnmarshalBinary implements the encoding.BinaryUnmarshaler interface.
 190  func (d *Digest) UnmarshalBinary(b []byte) error {
 191  	if len(b) < len(magic) || string(b[:len(magic)]) != magic {
 192  		return errors.New("xxhash: invalid hash state identifier")
 193  	}
 194  	if len(b) != marshaledSize {
 195  		return errors.New("xxhash: invalid hash state size")
 196  	}
 197  	b = b[len(magic):]
 198  	b, d.v1 = consumeUint64(b)
 199  	b, d.v2 = consumeUint64(b)
 200  	b, d.v3 = consumeUint64(b)
 201  	b, d.v4 = consumeUint64(b)
 202  	b, d.total = consumeUint64(b)
 203  	copy(d.mem[:], b)
 204  	d.n = int(d.total % uint64(len(d.mem)))
 205  	return nil
 206  }
 207  
 208  func appendUint64(b []byte, x uint64) []byte {
 209  	var a [8]byte
 210  	binary.LittleEndian.PutUint64(a[:], x)
 211  	return append(b, a[:]...)
 212  }
 213  
 214  func consumeUint64(b []byte) ([]byte, uint64) {
 215  	x := u64(b)
 216  	return b[8:], x
 217  }
 218  
 219  func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) }
 220  func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) }
 221  
 222  func round(acc, input uint64) uint64 {
 223  	acc += input * prime2
 224  	acc = rol31(acc)
 225  	acc *= prime1
 226  	return acc
 227  }
 228  
 229  func mergeRound(acc, val uint64) uint64 {
 230  	val = round(0, val)
 231  	acc ^= val
 232  	acc = acc*prime1 + prime4
 233  	return acc
 234  }
 235  
 236  func rol1(x uint64) uint64  { return bits.RotateLeft64(x, 1) }
 237  func rol7(x uint64) uint64  { return bits.RotateLeft64(x, 7) }
 238  func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) }
 239  func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) }
 240  func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) }
 241  func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) }
 242  func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) }
 243  func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) }
 244