bbloom.go raw
1 // The MIT License (MIT)
2 // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
3
4 // Permission is hereby granted, free of charge, to any person obtaining a copy of
5 // this software and associated documentation files (the "Software"), to deal in
6 // the Software without restriction, including without limitation the rights to
7 // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
8 // the Software, and to permit persons to whom the Software is furnished to do so,
9 // subject to the following conditions:
10
11 // The above copyright notice and this permission notice shall be included in all
12 // copies or substantial portions of the Software.
13
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
16 // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
17 // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
18 // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20
21 package z
22
23 import (
24 "bytes"
25 "encoding/json"
26 "log"
27 "math"
28 "unsafe"
29 )
30
31 // helper
32 var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
33
34 func getSize(ui64 uint64) (size uint64, exponent uint64) {
35 if ui64 < uint64(512) {
36 ui64 = uint64(512)
37 }
38 size = uint64(1)
39 for size < ui64 {
40 size <<= 1
41 exponent++
42 }
43 return size, exponent
44 }
45
46 func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
47 size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
48 locs := math.Ceil(float64(0.69314718056) * size / numEntries)
49 return uint64(size), uint64(locs)
50 }
51
52 // NewBloomFilter returns a new bloomfilter.
53 func NewBloomFilter(params ...float64) (bloomfilter *Bloom) {
54 var entries, locs uint64
55 if len(params) == 2 {
56 if params[1] < 1 {
57 entries, locs = calcSizeByWrongPositives(params[0], params[1])
58 } else {
59 entries, locs = uint64(params[0]), uint64(params[1])
60 }
61 } else {
62 log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" +
63 " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," +
64 " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
65 }
66 size, exponent := getSize(entries)
67 bloomfilter = &Bloom{
68 sizeExp: exponent,
69 size: size - 1,
70 setLocs: locs,
71 shift: 64 - exponent,
72 }
73 bloomfilter.Size(size)
74 return bloomfilter
75 }
76
77 // Bloom filter
78 type Bloom struct {
79 bitset []uint64
80 ElemNum uint64
81 sizeExp uint64
82 size uint64
83 setLocs uint64
84 shift uint64
85 }
86
87 // <--- http://www.cse.yorku.ca/~oz/hash.html
88 // modified Berkeley DB Hash (32bit)
89 // hash is casted to l, h = 16bit fragments
90 // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) {
91 // hash := uint64(len(*b))
92 // for _, c := range *b {
93 // hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash
94 // }
95 // h = hash >> bl.shift
96 // l = hash << bl.shift >> bl.shift
97 // return l, h
98 // }
99
100 // Add adds hash of a key to the bloomfilter.
101 func (bl *Bloom) Add(hash uint64) {
102 h := hash >> bl.shift
103 l := hash << bl.shift >> bl.shift
104 for i := uint64(0); i < bl.setLocs; i++ {
105 bl.Set((h + i*l) & bl.size)
106 bl.ElemNum++
107 }
108 }
109
110 // Has checks if bit(s) for entry hash is/are set,
111 // returns true if the hash was added to the Bloom Filter.
112 func (bl Bloom) Has(hash uint64) bool {
113 h := hash >> bl.shift
114 l := hash << bl.shift >> bl.shift
115 for i := uint64(0); i < bl.setLocs; i++ {
116 if !bl.IsSet((h + i*l) & bl.size) {
117 return false
118 }
119 }
120 return true
121 }
122
123 // AddIfNotHas only Adds hash, if it's not present in the bloomfilter.
124 // Returns true if hash was added.
125 // Returns false if hash was already registered in the bloomfilter.
126 func (bl *Bloom) AddIfNotHas(hash uint64) bool {
127 if bl.Has(hash) {
128 return false
129 }
130 bl.Add(hash)
131 return true
132 }
133
134 // TotalSize returns the total size of the bloom filter.
135 func (bl *Bloom) TotalSize() int {
136 // The bl struct has 5 members and each one is 8 byte. The bitset is a
137 // uint64 byte slice.
138 return len(bl.bitset)*8 + 5*8
139 }
140
141 // Size makes Bloom filter with as bitset of size sz.
142 func (bl *Bloom) Size(sz uint64) {
143 bl.bitset = make([]uint64, sz>>6)
144 }
145
146 // Clear resets the Bloom filter.
147 func (bl *Bloom) Clear() {
148 for i := range bl.bitset {
149 bl.bitset[i] = 0
150 }
151 }
152
153 // Set sets the bit[idx] of bitset.
154 func (bl *Bloom) Set(idx uint64) {
155 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
156 *(*uint8)(ptr) |= mask[idx%8]
157 }
158
159 // IsSet checks if bit[idx] of bitset is set, returns true/false.
160 func (bl *Bloom) IsSet(idx uint64) bool {
161 ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
162 r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
163 return r == 1
164 }
165
166 // bloomJSONImExport
167 // Im/Export structure used by JSONMarshal / JSONUnmarshal
168 type bloomJSONImExport struct {
169 FilterSet []byte
170 SetLocs uint64
171 }
172
173 // NewWithBoolset takes a []byte slice and number of locs per entry,
174 // returns the bloomfilter with a bitset populated according to the input []byte.
175 func newWithBoolset(bs *[]byte, locs uint64) *Bloom {
176 bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs))
177 for i, b := range *bs {
178 *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b
179 }
180 return bloomfilter
181 }
182
183 // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes
184 // returns bloom32 / bloom64 object.
185 func JSONUnmarshal(dbData []byte) (*Bloom, error) {
186 bloomImEx := bloomJSONImExport{}
187 if err := json.Unmarshal(dbData, &bloomImEx); err != nil {
188 return nil, err
189 }
190 buf := bytes.NewBuffer(bloomImEx.FilterSet)
191 bs := buf.Bytes()
192 bf := newWithBoolset(&bs, bloomImEx.SetLocs)
193 return bf, nil
194 }
195
196 // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte.
197 func (bl Bloom) JSONMarshal() []byte {
198 bloomImEx := bloomJSONImExport{}
199 bloomImEx.SetLocs = bl.setLocs
200 bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
201 for i := range bloomImEx.FilterSet {
202 bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) +
203 uintptr(i)))
204 }
205 data, err := json.Marshal(bloomImEx)
206 if err != nil {
207 log.Fatal("json.Marshal failed: ", err)
208 }
209 return data
210 }
211