Skip to content

Commit 04b84be

Browse files
committed
Fix sim_hashes_search filtering and add unit test
1 parent 10591b0 commit 04b84be

1 file changed

Lines changed: 28 additions & 3 deletions

File tree

crates/geo_filters/src/diff_count/sim_hash.rs

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,12 @@ impl<C: GeoConfig<Diff>> GeoDiffCount<'_, C> {
104104
expected_diff_size: usize,
105105
) -> (impl Iterator<Item = SimHash> + '_, usize) {
106106
let range = self.sim_hash_range(expected_diff_size);
107-
let sim_hash_iter = self.sim_hashes();
108-
let min_matches = sim_hash_iter
107+
let min_matches = range
109108
.len()
110109
.saturating_sub(expected_diff_size)
111110
.max(SIM_BUCKETS / 2);
112-
let filtered_iter = sim_hash_iter
111+
let filtered_iter = self
112+
.sim_hashes()
113113
.skip_while(move |(bucket_id, _)| *bucket_id >= range.end)
114114
.take_while(move |(bucket_id, _)| *bucket_id >= range.start)
115115
.map(|(_, sim_hash)| sim_hash);
@@ -222,3 +222,28 @@ impl BitVec<'_> {
222222
None
223223
}
224224
}
225+
226+
#[cfg(test)]
227+
mod tests {
228+
use rand::Rng as _;
229+
230+
use crate::{
231+
diff_count::{sim_hash::SIM_BUCKETS, GeoDiffCount7},
232+
test_rng::prng_test_harness,
233+
};
234+
235+
#[test]
236+
fn sim_hash_iter_min_matches() {
237+
prng_test_harness(100, |rng| {
238+
let i = rng.random_range(0..1000);
239+
let filter = GeoDiffCount7::pseudorandom_filter(i);
240+
let expected_diff = rng.random_range(0..i);
241+
let (iter, min_matches) = filter.sim_hashes_search(expected_diff);
242+
let actual_count = iter.count();
243+
let expected_min_matches = actual_count
244+
.saturating_sub(expected_diff)
245+
.max(SIM_BUCKETS / 2);
246+
assert_eq!(min_matches, expected_min_matches)
247+
});
248+
}
249+
}

0 commit comments

Comments
 (0)