Skip to content

Commit 3d3d3d0

Browse files
authored
Merge pull request #2 from yourbasic/tip
Tip
2 parents a6906f1 + afcd687 commit 3d3d3d0

6 files changed

Lines changed: 185 additions & 34 deletions

File tree

README.md

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Your basic Bloom filter
22

3-
Golang Bloom filter implementation
3+
### Golang probabilistic set data structure
4+
5+
A Bloom filter is a fast and space-efficient probabilistic
6+
data structure used to test set membership. A membership test
7+
returns either ”likely member” or ”definitely not a member”.
48

59
![Neutral density filter](ND-filter.jpg)
610

@@ -23,12 +27,14 @@ There is an online reference for the package at
2327
* The API of this library is frozen.
2428
* Version numbers adhere to [semantic versioning][sv].
2529

26-
The only accepted reason to modify the API of this package is to handle
27-
bug fixes that can't be resolved in any other reasonable way.
30+
The only accepted reason to modify the API of this package
31+
is to handle issues that can't be resolved in any other
32+
reasonable way.
2833

2934
### Thanks
3035

31-
Thanks to [Sébastien Paolacci][sp] for his excellent MurmurHash implementation.
36+
Thanks to [Sébastien Paolacci][sp] for his excellent
37+
MurmurHash implementation.
3238

3339
Stefan Nilsson – [korthaj](https://github.com/korthaj)
3440

filter.go

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,11 @@ type Filter struct {
5454
count int64 // Estimate number of elements
5555
}
5656

57-
// MurmurHash3 function.
58-
var murmur = new(digest)
57+
// MurmurHash3 functions.
58+
var (
59+
murmur = new(digest)
60+
murmurString = new(digestString)
61+
)
5962

6063
// New creates an empty Bloom filter with room for n elements
6164
// at a false-positives rate less than 1/p.
@@ -73,7 +76,15 @@ func New(n int, p int) *Filter {
7376

7477
// AddByte adds b to the filter and tells if b was already a likely member.
7578
func (f *Filter) AddByte(b []byte) bool {
76-
h1, h2 := murmur.hash(b)
79+
return f.add(murmur.hash(b))
80+
}
81+
82+
// Add adds s to the filter and tells if s was already a likely member.
83+
func (f *Filter) Add(s string) bool {
84+
return f.add(murmurString.hash(s))
85+
}
86+
87+
func (f *Filter) add(h1, h2 uint64) bool {
7788
trunc := uint64(len(f.data))<<shift - 1
7889
member := true
7990
for i := f.lookups; i > 0; i-- {
@@ -91,16 +102,17 @@ func (f *Filter) AddByte(b []byte) bool {
91102
return member
92103
}
93104

94-
// Add adds s to the filter and tells if s was already a likely member.
95-
func (f *Filter) Add(s string) bool {
96-
b := make([]byte, len(s))
97-
copy(b, s)
98-
return f.AddByte(b)
99-
}
100-
101105
// TestByte tells if b is a likely member of the filter.
102106
func (f *Filter) TestByte(b []byte) bool {
103-
h1, h2 := murmur.hash(b)
107+
return f.test(murmur.hash(b))
108+
}
109+
110+
// Test tells if s is a likely member of the filter.
111+
func (f *Filter) Test(s string) bool {
112+
return f.test(murmurString.hash(s))
113+
}
114+
115+
func (f *Filter) test(h1, h2 uint64) bool {
104116
trunc := uint64(len(f.data))<<shift - 1
105117
for i := f.lookups; i > 0; i-- {
106118
h1 += h2
@@ -113,13 +125,6 @@ func (f *Filter) TestByte(b []byte) bool {
113125
return true
114126
}
115127

116-
// Test tells if s is a likely member of the filter.
117-
func (f *Filter) Test(s string) bool {
118-
b := make([]byte, len(s))
119-
copy(b, s)
120-
return f.TestByte(b)
121-
}
122-
123128
// Count returns an estimate of the number of elements in the filter.
124129
func (f *Filter) Count() int64 {
125130
return f.count

filter_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ func BenchmarkTestByte(b *testing.B) {
126126
}
127127
}
128128

129-
func BenchmarkTestUnion(b *testing.B) {
129+
func BenchmarkUnion(b *testing.B) {
130130
n := 1000
131131
b.StopTimer()
132132
f1 := New(n, 200)

hash.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
package bloom
22

3-
import (
4-
"encoding/binary"
5-
)
6-
73
// MurmurHash3 implementation adapted from Sébastien Paolacci
84
// github.com/spaolacci/murmur3, released under BSD-3-Clause.
95

@@ -20,20 +16,24 @@ const (
2016
)
2117

2218
type digest struct {
23-
clen int // Digested input cumulative length.
24-
buf [16]byte // Expected (but not required) to be 16 large.
25-
tail []byte // 0 to 15 bytes view of buf.
26-
h1 uint64 // Running hash part 1.
27-
h2 uint64 // Running hash part 2.
19+
clen int
20+
tail []byte
21+
h1 uint64
22+
h2 uint64
23+
}
24+
25+
func Uint64(b []byte) uint64 {
26+
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
27+
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
2828
}
2929

3030
func (d *digest) bmix(p []byte) (tail []byte) {
3131
h1, h2 := d.h1, d.h2
3232
nblocks := len(p) / 16
3333
for i := 0; i < nblocks; i++ {
3434
j := 16 * i
35-
k1 := binary.LittleEndian.Uint64(p[j : j+8])
36-
k2 := binary.LittleEndian.Uint64(p[j+8 : j+16])
35+
k1 := Uint64(p[j : j+8])
36+
k2 := Uint64(p[j+8 : j+16])
3737
k1 *= c1
3838
k1 = (k1 << 31) | (k1 >> 33)
3939
k1 *= c2

hash_string.go

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
package bloom
2+
3+
// MurmurHash3 implementation adapted from Sébastien Paolacci
4+
// github.com/spaolacci/murmur3, released under BSD-3-Clause.
5+
6+
func (d *digestString) hash(data string) (h1 uint64, h2 uint64) {
7+
d.h1, d.h2 = 0, 0
8+
d.clen = len(data)
9+
d.tail = d.bmixString(data)
10+
return d.sum()
11+
}
12+
13+
type digestString struct {
14+
clen int
15+
tail string
16+
h1 uint64
17+
h2 uint64
18+
}
19+
20+
func Uint64String(b string) uint64 {
21+
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
22+
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
23+
}
24+
25+
func (d *digestString) bmixString(p string) (tail string) {
26+
h1, h2 := d.h1, d.h2
27+
nblocks := len(p) / 16
28+
for i := 0; i < nblocks; i++ {
29+
j := 16 * i
30+
k1 := Uint64String(p[j : j+8])
31+
k2 := Uint64String(p[j+8 : j+16])
32+
k1 *= c1
33+
k1 = (k1 << 31) | (k1 >> 33)
34+
k1 *= c2
35+
h1 ^= k1
36+
h1 = (h1 << 27) | (h1 >> 37)
37+
h1 += h2
38+
h1 = h1*5 + 0x52dce729
39+
k2 *= c2
40+
k2 = (k2 << 33) | (k2 >> 31)
41+
k2 *= c1
42+
h2 ^= k2
43+
h2 = (h2 << 31) | (h2 >> 33)
44+
h2 += h1
45+
h2 = h2*5 + 0x38495ab5
46+
}
47+
d.h1, d.h2 = h1, h2
48+
return p[nblocks*16:]
49+
}
50+
51+
func (d *digestString) sum() (h1, h2 uint64) {
52+
h1, h2 = d.h1, d.h2
53+
var k1, k2 uint64
54+
switch len(d.tail) & 15 {
55+
case 15:
56+
k2 ^= uint64(d.tail[14]) << 48
57+
fallthrough
58+
case 14:
59+
k2 ^= uint64(d.tail[13]) << 40
60+
fallthrough
61+
case 13:
62+
k2 ^= uint64(d.tail[12]) << 32
63+
fallthrough
64+
case 12:
65+
k2 ^= uint64(d.tail[11]) << 24
66+
fallthrough
67+
case 11:
68+
k2 ^= uint64(d.tail[10]) << 16
69+
fallthrough
70+
case 10:
71+
k2 ^= uint64(d.tail[9]) << 8
72+
fallthrough
73+
case 9:
74+
k2 ^= uint64(d.tail[8]) << 0
75+
k2 *= c2
76+
k2 = (k2 << 33) | (k2 >> 31)
77+
k2 *= c1
78+
h2 ^= k2
79+
fallthrough
80+
case 8:
81+
k1 ^= uint64(d.tail[7]) << 56
82+
fallthrough
83+
case 7:
84+
k1 ^= uint64(d.tail[6]) << 48
85+
fallthrough
86+
case 6:
87+
k1 ^= uint64(d.tail[5]) << 40
88+
fallthrough
89+
case 5:
90+
k1 ^= uint64(d.tail[4]) << 32
91+
fallthrough
92+
case 4:
93+
k1 ^= uint64(d.tail[3]) << 24
94+
fallthrough
95+
case 3:
96+
k1 ^= uint64(d.tail[2]) << 16
97+
fallthrough
98+
case 2:
99+
k1 ^= uint64(d.tail[1]) << 8
100+
fallthrough
101+
case 1:
102+
k1 ^= uint64(d.tail[0]) << 0
103+
k1 *= c1
104+
k1 = (k1 << 31) | (k1 >> 33)
105+
k1 *= c2
106+
h1 ^= k1
107+
}
108+
h1 ^= uint64(d.clen)
109+
h2 ^= uint64(d.clen)
110+
h1 += h2
111+
h2 += h1
112+
h1 = fmix(h1)
113+
h2 = fmix(h2)
114+
h1 += h2
115+
h2 += h1
116+
return h1, h2
117+
}

hash_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,26 @@ func TestHash(t *testing.T) {
2626
}
2727
}
2828
}
29+
30+
func TestHashString(t *testing.T) {
31+
d := new(digestString)
32+
var data = []struct {
33+
h1, h2 uint64
34+
s string
35+
}{
36+
{0x0000000000000000, 0x0000000000000000, ""},
37+
{0xcbd8a7b341bd9b02, 0x5b1e906a48ae1d19, "hello"},
38+
{0x342fac623a5ebc8e, 0x4cdcbc079642414d, "hello, world"},
39+
{0xb89e5988b737affc, 0x664fc2950231b2cb, "19 Jan 2038 at 3:14:07 AM"},
40+
{0xcd99481f9ee902c9, 0x695da1a38987b6e7, "The quick brown fox jumps over the lazy dog."},
41+
}
42+
for _, x := range data {
43+
h1, h2 := d.hash(x.s)
44+
if h1 != x.h1 {
45+
t.Errorf("hash(%q).h1 = %d; want %d\n", x.s, h1, x.h1)
46+
}
47+
if h2 != x.h2 {
48+
t.Errorf("hash(%q).h2 = %d; want %d\n", x.s, h2, x.h2)
49+
}
50+
}
51+
}

0 commit comments

Comments
 (0)