1 // Copyright 2013 The LevelDB-Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 5 package y
6 7 import "math"
8 9 // Filter is an encoded set of []byte keys.
10 type Filter []byte
11 12 func (f Filter) MayContainKey(k []byte) bool {
13 return f.MayContain(Hash(k))
14 }
15 16 // MayContain returns whether the filter may contain given key. False positives
17 // are possible, where it returns true for keys not in the original set.
18 func (f Filter) MayContain(h uint32) bool {
19 if len(f) < 2 {
20 return false
21 }
22 k := f[len(f)-1]
23 if k > 30 {
24 // This is reserved for potentially new encodings for short Bloom filters.
25 // Consider it a match.
26 return true
27 }
28 nBits := uint32(8 * (len(f) - 1))
29 delta := h>>17 | h<<15
30 for j := uint8(0); j < k; j++ {
31 bitPos := h % nBits
32 if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
33 return false
34 }
35 h += delta
36 }
37 return true
38 }
39 40 // NewFilter returns a new Bloom filter that encodes a set of []byte keys with
41 // the given number of bits per key, approximately.
42 //
43 // A good bitsPerKey value is 10, which yields a filter with ~ 1% false
44 // positive rate.
45 func NewFilter(keys []uint32, bitsPerKey int) Filter {
46 return Filter(appendFilter(nil, keys, bitsPerKey))
47 }
48 49 // BloomBitsPerKey returns the bits per key required by bloomfilter based on
50 // the false positive rate.
51 func BloomBitsPerKey(numEntries int, fp float64) int {
52 size := -1 * float64(numEntries) * math.Log(fp) / math.Pow(float64(0.69314718056), 2)
53 locs := math.Ceil(float64(0.69314718056) * size / float64(numEntries))
54 return int(locs)
55 }
56 57 func appendFilter(buf []byte, keys []uint32, bitsPerKey int) []byte {
58 if bitsPerKey < 0 {
59 bitsPerKey = 0
60 }
61 // 0.69 is approximately ln(2).
62 k := uint32(float64(bitsPerKey) * 0.69)
63 if k < 1 {
64 k = 1
65 }
66 if k > 30 {
67 k = 30
68 }
69 70 nBits := len(keys) * bitsPerKey
71 // For small len(keys), we can see a very high false positive rate. Fix it
72 // by enforcing a minimum bloom filter length.
73 if nBits < 64 {
74 nBits = 64
75 }
76 nBytes := (nBits + 7) / 8
77 nBits = nBytes * 8
78 buf, filter := extend(buf, nBytes+1)
79 80 for _, h := range keys {
81 delta := h>>17 | h<<15
82 for j := uint32(0); j < k; j++ {
83 bitPos := h % uint32(nBits)
84 filter[bitPos/8] |= 1 << (bitPos % 8)
85 h += delta
86 }
87 }
88 filter[nBytes] = uint8(k)
89 90 return buf
91 }
92 93 // extend appends n zero bytes to b. It returns the overall slice (of length
94 // n+len(originalB)) and the slice of n trailing zeroes.
95 func extend(b []byte, n int) (overall, trailer []byte) {
96 want := n + len(b)
97 if want <= cap(b) {
98 overall = b[:want]
99 trailer = overall[len(b):]
100 for i := range trailer {
101 trailer[i] = 0
102 }
103 } else {
104 // Grow the capacity exponentially, with a 1KiB minimum.
105 c := 1024
106 for c < want {
107 c += c / 4
108 }
109 overall = make([]byte, want, c)
110 trailer = overall[len(b):]
111 copy(overall, b)
112 }
113 return overall, trailer
114 }
115 116 // hash implements a hashing algorithm similar to the Murmur hash.
117 func Hash(b []byte) uint32 {
118 const (
119 seed = 0xbc9f1d34
120 m = 0xc6a4a793
121 )
122 h := uint32(seed) ^ uint32(len(b))*m
123 for ; len(b) >= 4; b = b[4:] {
124 h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
125 h *= m
126 h ^= h >> 16
127 }
128 switch len(b) {
129 case 3:
130 h += uint32(b[2]) << 16
131 fallthrough
132 case 2:
133 h += uint32(b[1]) << 8
134 fallthrough
135 case 1:
136 h += uint32(b[0])
137 h *= m
138 h ^= h >> 24
139 }
140 return h
141 }
142 143 // FilterPolicy implements the db.FilterPolicy interface from the leveldb/db
144 // package.
145 //
146 // The integer value is the approximate number of bits used per key. A good
147 // value is 10, which yields a filter with ~ 1% false positive rate.
148 //
149 // It is valid to use the other API in this package (leveldb/bloom) without
150 // using this type or the leveldb/db package.
151 152 // type FilterPolicy int
153 154 // // Name implements the db.FilterPolicy interface.
155 // func (p FilterPolicy) Name() string {
156 // // This string looks arbitrary, but its value is written to LevelDB .ldb
157 // // files, and should be this exact value to be compatible with those files
158 // // and with the C++ LevelDB code.
159 // return "leveldb.BuiltinBloomFilter2"
160 // }
161 162 // // AppendFilter implements the db.FilterPolicy interface.
163 // func (p FilterPolicy) AppendFilter(dst []byte, keys [][]byte) []byte {
164 // return appendFilter(dst, keys, int(p))
165 // }
166 167 // // MayContain implements the db.FilterPolicy interface.
168 // func (p FilterPolicy) MayContain(filter, key []byte) bool {
169 // return Filter(filter).MayContain(key)
170 // }
171