hll.go raw
1 package hyperloglog
2
3 import (
4 "encoding/binary"
5 "encoding/hex"
6 "fmt"
7 )
8
9 // Everything is hardcoded to use precision 8, i.e. 256 registers.
10 type HyperLogLog struct {
11 offset int
12 registers []uint8
13 }
14
15 func New(offset int) *HyperLogLog {
16 if offset < 0 || offset > 32-8 {
17 panic(fmt.Errorf("invalid offset %d", offset))
18 }
19
20 // precision is always 8
21 // the number of registers is always 256 (1<<8)
22 hll := &HyperLogLog{offset: offset}
23 hll.registers = make([]uint8, 256)
24 return hll
25 }
26
27 func NewWithRegisters(registers []byte, offset int) *HyperLogLog {
28 if offset < 0 || offset > 32-8 {
29 panic(fmt.Errorf("invalid offset %d", offset))
30 }
31 if len(registers) != 256 {
32 panic(fmt.Errorf("invalid number of registers %d", len(registers)))
33 }
34 return &HyperLogLog{registers: registers, offset: offset}
35 }
36
37 func (hll *HyperLogLog) GetRegisters() []byte { return hll.registers }
38 func (hll *HyperLogLog) SetRegisters(enc []byte) { hll.registers = enc }
39 func (hll *HyperLogLog) MergeRegisters(other []byte) {
40 for i, v := range other {
41 if v > hll.registers[i] {
42 hll.registers[i] = v
43 }
44 }
45 }
46
47 func (hll *HyperLogLog) Clear() {
48 for i := range hll.registers {
49 hll.registers[i] = 0
50 }
51 }
52
53 // Add takes a Nostr event pubkey which will be used as the item "key" (that combined with the offset)
54 func (hll *HyperLogLog) Add(pubkey string) {
55 x, _ := hex.DecodeString(pubkey[hll.offset*2 : hll.offset*2+8*2])
56 j := x[0] // register address (first 8 bits, i.e. first byte)
57
58 w := binary.BigEndian.Uint64(x) // number that we will use
59 zeroBits := clz56(w) + 1 // count zeroes (skip the first byte, so only use 56 bits)
60
61 if zeroBits > hll.registers[j] {
62 hll.registers[j] = zeroBits
63 }
64 }
65
66 // AddBytes is like Add, but takes pubkey as bytes instead of as string
67 func (hll *HyperLogLog) AddBytes(pubkey []byte) {
68 x := pubkey[hll.offset : hll.offset+8]
69 j := x[0] // register address (first 8 bits, i.e. first byte)
70
71 w := binary.BigEndian.Uint64(x) // number that we will use
72 zeroBits := clz56(w) + 1 // count zeroes (skip the first byte, so only use 56 bits)
73
74 if zeroBits > hll.registers[j] {
75 hll.registers[j] = zeroBits
76 }
77 }
78
79 func (hll *HyperLogLog) Merge(other *HyperLogLog) {
80 for i, v := range other.registers {
81 if v > hll.registers[i] {
82 hll.registers[i] = v
83 }
84 }
85 }
86
87 func (hll *HyperLogLog) Count() uint64 {
88 v := countZeros(hll.registers)
89
90 if v != 0 {
91 lc := linearCounting(256 /* nregisters */, v)
92
93 if lc <= 220 /* threshold */ {
94 return uint64(lc)
95 }
96 }
97
98 est := hll.calculateEstimate()
99 if est <= 256 /* nregisters */ *3 {
100 if v != 0 {
101 return uint64(linearCounting(256 /* nregisters */, v))
102 }
103 }
104
105 return uint64(est)
106 }
107
108 func (hll HyperLogLog) calculateEstimate() float64 {
109 sum := 0.0
110 for _, val := range hll.registers {
111 sum += 1.0 / float64(uint64(1)<<val) // this is the same as 2^(-val)
112 }
113
114 return 0.7182725932495458 /* alpha for 256 registers */ * 256 /* nregisters */ * 256 /* nregisters */ / sum
115 }
116