Skip to content

Commit 05de11b

Browse files
authored
Merge pull request #4 from yourbasic/tip
Tip
2 parents c6ddac7 + fae88d1 commit 05de11b

4 files changed

Lines changed: 144 additions & 190 deletions

File tree

filter.go

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
// 1024 1.8 10
3333
//
3434
// This implementation is not intended for cryptographic use.
35-
// Each membership test makes a single call to a 128-bit MurmurHash3 function.
35+
// Each membership test makes a single call to a 128-bit hash function.
3636
// This improves speed without increasing the false-positives rate
3737
// as shown by Kirsch and Mitzenmacher.
3838
//
@@ -54,12 +54,6 @@ type Filter struct {
5454
count int64 // Estimate number of elements
5555
}
5656

57-
// MurmurHash3 functions.
58-
var (
59-
murmur = new(digest)
60-
murmurString = new(digestString)
61-
)
62-
6357
// New creates an empty Bloom filter with room for n elements
6458
// at a false-positives rate less than 1/p.
6559
func New(n int, p int) *Filter {
@@ -76,12 +70,12 @@ func New(n int, p int) *Filter {
7670

7771
// AddByte adds b to the filter and tells if b was already a likely member.
7872
func (f *Filter) AddByte(b []byte) bool {
79-
return f.add(murmur.hash(b))
73+
return f.add(hash(b))
8074
}
8175

8276
// Add adds s to the filter and tells if s was already a likely member.
8377
func (f *Filter) Add(s string) bool {
84-
return f.add(murmurString.hash(s))
78+
return f.add(hashString(s))
8579
}
8680

8781
func (f *Filter) add(h1, h2 uint64) bool {
@@ -104,12 +98,12 @@ func (f *Filter) add(h1, h2 uint64) bool {
10498

10599
// TestByte tells if b is a likely member of the filter.
106100
func (f *Filter) TestByte(b []byte) bool {
107-
return f.test(murmur.hash(b))
101+
return f.test(hash(b))
108102
}
109103

110104
// Test tells if s is a likely member of the filter.
111105
func (f *Filter) Test(s string) bool {
112-
return f.test(murmurString.hash(s))
106+
return f.test(hashString(s))
113107
}
114108

115109
func (f *Filter) test(h1, h2 uint64) bool {

hash.go

Lines changed: 135 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -3,129 +3,208 @@ package bloom
33
// MurmurHash3 implementation adapted from Sébastien Paolacci
44
// github.com/spaolacci/murmur3, released under BSD-3-Clause.
55

6-
func (d *digest) hash(data []byte) (h1 uint64, h2 uint64) {
7-
d.h1, d.h2 = 0, 0
8-
d.clen = len(data)
9-
d.tail = d.bmix(data)
10-
return d.sum()
11-
}
12-
136
const (
147
c1 = 0x87c37b91114253d5
158
c2 = 0x4cf5ad432745937f
169
)
1710

18-
type digest struct {
19-
clen int
20-
tail []byte
21-
h1 uint64
22-
h2 uint64
11+
func fmix(k uint64) uint64 {
12+
k ^= k >> 33
13+
k *= 0xff51afd7ed558ccd
14+
k ^= k >> 33
15+
k *= 0xc4ceb9fe1a85ec53
16+
k ^= k >> 33
17+
return k
2318
}
2419

2520
func uint64byte(b []byte) uint64 {
2621
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
2722
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
2823
}
2924

30-
func (d *digest) bmix(p []byte) (tail []byte) {
31-
h1, h2 := d.h1, d.h2
32-
nblocks := len(p) / 16
25+
func uint64string(s string) uint64 {
26+
return uint64(s[0]) | uint64(s[1])<<8 | uint64(s[2])<<16 | uint64(s[3])<<24 |
27+
uint64(s[4])<<32 | uint64(s[5])<<40 | uint64(s[6])<<48 | uint64(s[7])<<56
28+
}
29+
30+
func hash(b []byte) (h1, h2 uint64) {
31+
nblocks := len(b) / 16
3332
for i := 0; i < nblocks; i++ {
3433
j := 16 * i
35-
k1 := uint64byte(p[j : j+8])
36-
k2 := uint64byte(p[j+8 : j+16])
34+
k1, k2 := uint64byte(b[j:j+8]), uint64byte(b[j+8:j+16])
35+
3736
k1 *= c1
38-
k1 = (k1 << 31) | (k1 >> 33)
37+
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
3938
k1 *= c2
39+
4040
h1 ^= k1
41-
h1 = (h1 << 27) | (h1 >> 37)
41+
h1 = (h1 << 27) | (h1 >> 37) // rotl64(h1, 27)
4242
h1 += h2
4343
h1 = h1*5 + 0x52dce729
44+
4445
k2 *= c2
45-
k2 = (k2 << 33) | (k2 >> 31)
46+
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
4647
k2 *= c1
48+
4749
h2 ^= k2
48-
h2 = (h2 << 31) | (h2 >> 33)
50+
h2 = (h2 << 31) | (h2 >> 33) // rotl64(h2, 31)
4951
h2 += h1
5052
h2 = h2*5 + 0x38495ab5
5153
}
52-
d.h1, d.h2 = h1, h2
53-
return p[nblocks*16:]
54-
}
5554

56-
func (d *digest) sum() (h1, h2 uint64) {
57-
h1, h2 = d.h1, d.h2
55+
tail := b[nblocks*16:]
5856
var k1, k2 uint64
59-
switch len(d.tail) & 15 {
57+
switch len(tail) {
6058
case 15:
61-
k2 ^= uint64(d.tail[14]) << 48
59+
k2 ^= uint64(tail[14]) << 48
6260
fallthrough
6361
case 14:
64-
k2 ^= uint64(d.tail[13]) << 40
62+
k2 ^= uint64(tail[13]) << 40
6563
fallthrough
6664
case 13:
67-
k2 ^= uint64(d.tail[12]) << 32
65+
k2 ^= uint64(tail[12]) << 32
6866
fallthrough
6967
case 12:
70-
k2 ^= uint64(d.tail[11]) << 24
68+
k2 ^= uint64(tail[11]) << 24
7169
fallthrough
7270
case 11:
73-
k2 ^= uint64(d.tail[10]) << 16
71+
k2 ^= uint64(tail[10]) << 16
7472
fallthrough
7573
case 10:
76-
k2 ^= uint64(d.tail[9]) << 8
74+
k2 ^= uint64(tail[9]) << 8
7775
fallthrough
7876
case 9:
79-
k2 ^= uint64(d.tail[8]) << 0
77+
k2 ^= uint64(tail[8]) << 0
8078
k2 *= c2
81-
k2 = (k2 << 33) | (k2 >> 31)
79+
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
8280
k2 *= c1
8381
h2 ^= k2
8482
fallthrough
8583
case 8:
86-
k1 ^= uint64(d.tail[7]) << 56
84+
k1 ^= uint64(tail[7]) << 56
8785
fallthrough
8886
case 7:
89-
k1 ^= uint64(d.tail[6]) << 48
87+
k1 ^= uint64(tail[6]) << 48
9088
fallthrough
9189
case 6:
92-
k1 ^= uint64(d.tail[5]) << 40
90+
k1 ^= uint64(tail[5]) << 40
9391
fallthrough
9492
case 5:
95-
k1 ^= uint64(d.tail[4]) << 32
93+
k1 ^= uint64(tail[4]) << 32
9694
fallthrough
9795
case 4:
98-
k1 ^= uint64(d.tail[3]) << 24
96+
k1 ^= uint64(tail[3]) << 24
9997
fallthrough
10098
case 3:
101-
k1 ^= uint64(d.tail[2]) << 16
99+
k1 ^= uint64(tail[2]) << 16
102100
fallthrough
103101
case 2:
104-
k1 ^= uint64(d.tail[1]) << 8
102+
k1 ^= uint64(tail[1]) << 8
105103
fallthrough
106104
case 1:
107-
k1 ^= uint64(d.tail[0]) << 0
105+
k1 ^= uint64(tail[0]) << 0
108106
k1 *= c1
109-
k1 = (k1 << 31) | (k1 >> 33)
107+
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
110108
k1 *= c2
111109
h1 ^= k1
112110
}
113-
h1 ^= uint64(d.clen)
114-
h2 ^= uint64(d.clen)
111+
h1 ^= uint64(len(b))
112+
h2 ^= uint64(len(b))
115113
h1 += h2
116114
h2 += h1
117-
h1 = fmix(h1)
118-
h2 = fmix(h2)
115+
h1, h2 = fmix(h1), fmix(h2)
119116
h1 += h2
120117
h2 += h1
121-
return h1, h2
118+
return
122119
}
123120

124-
func fmix(k uint64) uint64 {
125-
k ^= k >> 33
126-
k *= 0xff51afd7ed558ccd
127-
k ^= k >> 33
128-
k *= 0xc4ceb9fe1a85ec53
129-
k ^= k >> 33
130-
return k
121+
func hashString(s string) (h1, h2 uint64) {
122+
nblocks := len(s) / 16
123+
for i := 0; i < nblocks; i++ {
124+
j := 16 * i
125+
k1, k2 := uint64string(s[j:j+8]), uint64string(s[j+8:j+16])
126+
127+
k1 *= c1
128+
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
129+
k1 *= c2
130+
131+
h1 ^= k1
132+
h1 = (h1 << 27) | (h1 >> 37) // rotl64(h1, 27)
133+
h1 += h2
134+
h1 = h1*5 + 0x52dce729
135+
136+
k2 *= c2
137+
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
138+
k2 *= c1
139+
140+
h2 ^= k2
141+
h2 = (h2 << 31) | (h2 >> 33) // rotl64(h2, 31)
142+
h2 += h1
143+
h2 = h2*5 + 0x38495ab5
144+
}
145+
146+
tail := s[nblocks*16:]
147+
var k1, k2 uint64
148+
switch len(tail) {
149+
case 15:
150+
k2 ^= uint64(tail[14]) << 48
151+
fallthrough
152+
case 14:
153+
k2 ^= uint64(tail[13]) << 40
154+
fallthrough
155+
case 13:
156+
k2 ^= uint64(tail[12]) << 32
157+
fallthrough
158+
case 12:
159+
k2 ^= uint64(tail[11]) << 24
160+
fallthrough
161+
case 11:
162+
k2 ^= uint64(tail[10]) << 16
163+
fallthrough
164+
case 10:
165+
k2 ^= uint64(tail[9]) << 8
166+
fallthrough
167+
case 9:
168+
k2 ^= uint64(tail[8]) << 0
169+
k2 *= c2
170+
k2 = (k2 << 33) | (k2 >> 31) // rotl64(k2, 33)
171+
k2 *= c1
172+
h2 ^= k2
173+
fallthrough
174+
case 8:
175+
k1 ^= uint64(tail[7]) << 56
176+
fallthrough
177+
case 7:
178+
k1 ^= uint64(tail[6]) << 48
179+
fallthrough
180+
case 6:
181+
k1 ^= uint64(tail[5]) << 40
182+
fallthrough
183+
case 5:
184+
k1 ^= uint64(tail[4]) << 32
185+
fallthrough
186+
case 4:
187+
k1 ^= uint64(tail[3]) << 24
188+
fallthrough
189+
case 3:
190+
k1 ^= uint64(tail[2]) << 16
191+
fallthrough
192+
case 2:
193+
k1 ^= uint64(tail[1]) << 8
194+
fallthrough
195+
case 1:
196+
k1 ^= uint64(tail[0]) << 0
197+
k1 *= c1
198+
k1 = (k1 << 31) | (k1 >> 33) // rotl64(k1, 31)
199+
k1 *= c2
200+
h1 ^= k1
201+
}
202+
h1 ^= uint64(len(s))
203+
h2 ^= uint64(len(s))
204+
h1 += h2
205+
h2 += h1
206+
h1, h2 = fmix(h1), fmix(h2)
207+
h1 += h2
208+
h2 += h1
209+
return
131210
}

0 commit comments

Comments
 (0)