Skip to content

Commit f6bc74b

Browse files
authored
Merge pull request #76 from github/sc-20250730-size-as-usize
Add rounding versions of size methods on `Count`
2 parents 3e95c46 + eabeed9 commit f6bc74b

7 files changed

Lines changed: 44 additions & 18 deletions

File tree

crates/geo_filters/README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ c2.push(2);
3333
c2.push(3);
3434

3535
let estimated_size = c1.size_with_sketch(&c2);
36-
assert!(estimated_size >= 3.0_f32 * 0.9 &&
37-
estimated_size <= 3.0_f32 * 1.1);
36+
assert_eq!(estimated_size, 3);
3837
```
3938

4039
## Background

crates/geo_filters/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ pub(crate) mod tests {
374374
m.push_hash(rnd.next_u64());
375375
}
376376
// Compute the relative error between estimate and actually inserted items.
377-
let high_precision = m.size() / cnt as f32 - 1.0;
377+
let high_precision = m.size_f32() / cnt as f32 - 1.0;
378378
// Take the average over trials many attempts.
379379
avg_precision += high_precision / trials as f32;
380380
avg_var += high_precision.powf(2.0) / trials as f32;

crates/geo_filters/src/diff_count.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -443,11 +443,11 @@ impl<C: GeoConfig<Diff>> Count<Diff> for GeoDiffCount<'_, C> {
443443
*self = xor(self, other);
444444
}
445445

446-
fn size(&self) -> f32 {
446+
fn size_f32(&self) -> f32 {
447447
self.estimate_size()
448448
}
449449

450-
fn size_with_sketch(&self, other: &Self) -> f32 {
450+
fn size_with_sketch_f32(&self, other: &Self) -> f32 {
451451
assert!(
452452
self.config == other.config,
453453
"combined filters must have the same configuration"
@@ -501,7 +501,7 @@ mod tests {
501501
let mut geo_count = GeoDiffCount13::default();
502502

503503
(0..n).for_each(|i| geo_count.push(i));
504-
assert_eq!(result, geo_count.size());
504+
assert_eq!(result, geo_count.size_f32());
505505
}
506506
}
507507

crates/geo_filters/src/distinct_count.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ impl<C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'_, C> {
143143
*self = or(self, other)
144144
}
145145

146-
fn size(&self) -> f32 {
146+
fn size_f32(&self) -> f32 {
147147
let lowest_bucket = self.lsb.bit_range().start;
148148
let total = self.msb.len()
149149
+ self
@@ -159,7 +159,7 @@ impl<C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'_, C> {
159159
}
160160
}
161161

162-
fn size_with_sketch(&self, other: &Self) -> f32 {
162+
fn size_with_sketch_f32(&self, other: &Self) -> f32 {
163163
assert!(
164164
self.config == other.config,
165165
"combined filters must have the same configuration"
@@ -272,7 +272,7 @@ mod tests {
272272
] {
273273
let mut geo_count = GeoDistinctCount13::default();
274274
(0..n).for_each(|i| geo_count.push(i));
275-
assert_eq!(result, geo_count.size());
275+
assert_eq!(result, geo_count.size_f32());
276276
}
277277
}
278278

crates/geo_filters/src/evaluation/hll.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ impl<C: HllConfig> Count<Distinct> for Hll<C> {
126126
unimplemented!()
127127
}
128128

129-
fn size(&self) -> f32 {
129+
fn size_f32(&self) -> f32 {
130130
self.inner.borrow_mut().count() as f32
131131
}
132132

133-
fn size_with_sketch(&self, _other: &Self) -> f32 {
133+
fn size_with_sketch_f32(&self, _other: &Self) -> f32 {
134134
unimplemented!()
135135
}
136136

crates/geo_filters/src/evaluation/simulation.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ impl<C: GeoConfig<Diff> + Clone> SimulationCount for GeoDiffCount<'_, C> {
2525
<Self as Count<_>>::push_hash(self, hash)
2626
}
2727
fn size(&self) -> f32 {
28-
<Self as Count<_>>::size(self)
28+
<Self as Count<_>>::size_f32(self)
2929
}
3030
fn bytes_in_memory(&self) -> usize {
3131
<Self as Count<_>>::bytes_in_memory(self)
@@ -36,7 +36,7 @@ impl<C: GeoConfig<Distinct>> SimulationCount for GeoDistinctCount<'_, C> {
3636
<Self as Count<_>>::push_hash(self, hash)
3737
}
3838
fn size(&self) -> f32 {
39-
<Self as Count<_>>::size(self)
39+
<Self as Count<_>>::size_f32(self)
4040
}
4141
fn bytes_in_memory(&self) -> usize {
4242
<Self as Count<_>>::bytes_in_memory(self)
@@ -47,7 +47,7 @@ impl<C: HllConfig> SimulationCount for Hll<C> {
4747
<Self as Count<_>>::push_hash(self, hash)
4848
}
4949
fn size(&self) -> f32 {
50-
<Self as Count<_>>::size(self)
50+
<Self as Count<_>>::size_f32(self)
5151
}
5252
fn bytes_in_memory(&self) -> usize {
5353
<Self as Count<_>>::bytes_in_memory(self)

crates/geo_filters/src/lib.rs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,17 +43,44 @@ pub trait Count<M: Method> {
4343
/// If only the size of the combined set is needed, [`Self::size_with_sketch`] is more efficient and should be used.
4444
fn push_sketch(&mut self, other: &Self);
4545

46-
/// Return the estimated set size.
47-
fn size(&self) -> f32;
46+
/// Return the estimated set size rounded to the nearest unsigned integer.
47+
fn size(&self) -> usize {
48+
let size = self.size_f32().round();
49+
debug_assert_f32s_in_range(size);
50+
size as usize
51+
}
4852

49-
/// Return the estimated set size when combined with the given sketch.
53+
/// Return the estimated set size as a real number.
54+
fn size_f32(&self) -> f32;
55+
56+
/// Return the estimated set size when combined with the given sketch rounded to the nearest unsigned integer.
57+
/// If the combined set itself is not going to be used, this method is more efficient than using [`Self::push_sketch`] and [`Self::size`].
58+
fn size_with_sketch(&self, other: &Self) -> usize {
59+
let size = self.size_with_sketch_f32(other).round();
60+
debug_assert_f32s_in_range(size);
61+
size as usize
62+
}
63+
64+
/// Return the estimated set size when combined with the given sketch as a real number.
5065
/// If the combined set itself is not going to be used, this method is more efficient than using [`Self::push_sketch`] and [`Self::size`].
51-
fn size_with_sketch(&self, other: &Self) -> f32;
66+
fn size_with_sketch_f32(&self, other: &Self) -> f32;
5267

5368
/// Returns the number of bytes in memory used to represent this filter.
5469
fn bytes_in_memory(&self) -> usize;
5570
}
5671

72+
#[inline]
73+
fn debug_assert_f32s_in_range(v: f32) {
74+
// The geometric filter should never produce these values.
75+
// These assertions failing indicates that there is a bug.
76+
debug_assert!(v.is_finite(), "Estimated size must be finite, got {v}");
77+
debug_assert!(v >= 0.0, "Estimated size must be non-negative, got {v}");
78+
debug_assert!(
79+
v <= usize::MAX as f32,
80+
"Estimated size {v} exceeds usize::MAX",
81+
);
82+
}
83+
5784
#[doc = include_str!("../README.md")]
5885
#[cfg(doctest)]
5986
pub struct ReadmeDocTests;

0 commit comments

Comments
 (0)