@@ -18,6 +18,7 @@ mod sim_hash;
1818
1919use bitvec:: * ;
2020pub use config:: { GeoDiffConfig13 , GeoDiffConfig7 } ;
21+ pub use sim_hash:: { SimHash , SIM_BUCKETS , SIM_BUCKET_SIZE } ;
2122
2223/// Diff count filter with a relative error standard deviation of ~0.125.
2324pub type GeoDiffCount7 < ' a > = GeoDiffCount < ' a , GeoDiffConfig7 > ;
@@ -302,7 +303,7 @@ impl<'a, C: GeoConfig<Diff>> GeoDiffCount<'a, C> {
302303
303304 /// Create a new [`GeoDiffCount`] from a slice of bytes
304305 #[ cfg( target_endian = "little" ) ]
305- pub fn from_bytes ( c : C , buf : & ' a [ u8 ] ) -> Self {
306+ pub fn from_bytes_with_config ( c : C , buf : & ' a [ u8 ] ) -> Self {
306307 if buf. is_empty ( ) {
307308 return Self :: new ( c) ;
308309 }
@@ -338,6 +339,53 @@ impl<'a, C: GeoConfig<Diff>> GeoDiffCount<'a, C> {
338339 bytes_written += self . lsb . write ( writer) ?;
339340 Ok ( bytes_written)
340341 }
342+
343+ #[ cfg( any( test, feature = "test-support" ) ) ]
344+ pub fn from_ones_with_config ( config : C , ones : impl IntoIterator < Item = C :: BucketType > ) -> Self {
345+ let mut result = Self :: new ( config) ;
346+ for one in ones {
347+ result. xor_bit ( one) ;
348+ }
349+ result
350+ }
351+
352+ #[ cfg( any( test, feature = "test-support" ) ) ]
353+ pub fn iter_ones ( & self ) -> impl Iterator < Item = C :: BucketType > + ' _ {
354+ iter_ones ( self . bit_chunks ( ) . peekable ( ) ) . map ( C :: BucketType :: from_usize)
355+ }
356+
357+ /// Generate a pseudo-random filter. The RNG used to build the filter
358+ /// is seeded using the number of items so for a given number of items
359+ /// the resulting geofilter should always be the same.
360+ #[ cfg( any( test, feature = "test-support" ) ) ]
361+ pub fn pseudorandom_filter_with_config ( config : C , items : usize ) -> Self {
362+ use rand:: RngCore ;
363+ use rand_chacha:: rand_core:: SeedableRng ;
364+
365+ let mut rng = rand_chacha:: ChaCha12Rng :: seed_from_u64 ( items as u64 ) ;
366+ let mut filter = Self :: new ( config) ;
367+ for _ in 0 ..items {
368+ filter. push_hash ( rng. next_u64 ( ) ) ;
369+ }
370+ filter
371+ }
372+ }
373+
374+ impl < ' a , C : GeoConfig < Diff > + Default > GeoDiffCount < ' a , C > {
375+ #[ cfg( target_endian = "little" ) ]
376+ pub fn from_bytes ( buf : & ' a [ u8 ] ) -> Self {
377+ Self :: from_bytes_with_config ( C :: default ( ) , buf)
378+ }
379+
380+ #[ cfg( any( test, feature = "test-support" ) ) ]
381+ pub fn from_ones ( ones : impl IntoIterator < Item = C :: BucketType > ) -> Self {
382+ Self :: from_ones_with_config ( C :: default ( ) , ones)
383+ }
384+
385+ #[ cfg( any( test, feature = "test-support" ) ) ]
386+ pub fn pseudorandom_filter ( items : usize ) -> Self {
387+ Self :: pseudorandom_filter_with_config ( C :: default ( ) , items)
388+ }
341389}
342390
343391/// Applies a repeated bit mask to the underlying filter.
@@ -419,11 +467,12 @@ mod tests {
419467 use std:: io:: Write ;
420468
421469 use itertools:: Itertools ;
422- use rand:: { rngs:: StdRng , seq:: IteratorRandom , RngCore } ;
470+ use rand:: { seq:: IteratorRandom , RngCore } ;
471+ use rand_chacha:: ChaCha12Rng ;
423472
424473 use crate :: {
425474 build_hasher:: UnstableDefaultBuildHasher ,
426- config:: { iter_ones , tests:: test_estimate, FixedConfig } ,
475+ config:: { tests:: test_estimate, FixedConfig } ,
427476 test_rng:: prng_test_harness,
428477 } ;
429478
@@ -458,8 +507,8 @@ mod tests {
458507
459508 #[ test]
460509 fn test_xor ( ) {
461- let a = GeoDiffCount7 :: from_ones ( Default :: default ( ) , 0 ..1000 ) ;
462- let b = GeoDiffCount7 :: from_ones ( Default :: default ( ) , 10 ..1010 ) ;
510+ let a = GeoDiffCount7 :: from_ones ( 0 ..1000 ) ;
511+ let b = GeoDiffCount7 :: from_ones ( 10 ..1010 ) ;
463512 let c = xor ( & a, & b) ;
464513 let d = xor ( & a, & b) ;
465514 assert_eq ! ( a. iter_ones( ) . count( ) , 1000 ) ;
@@ -479,7 +528,7 @@ mod tests {
479528 m. xor_bit ( 10 ) ;
480529 assert ! ( m. iter_ones( ) . collect_vec( ) . is_empty( ) ) ;
481530
482- let mut m = GeoDiffCount7 :: from_ones ( Default :: default ( ) , 0 ..100 ) ;
531+ let mut m = GeoDiffCount7 :: from_ones ( 0 ..100 ) ;
483532 assert_eq ! ( m. iter_ones( ) . count( ) , 100 ) ;
484533 m. xor_bit ( 10 ) ;
485534 assert_eq ! ( m. iter_ones( ) . count( ) , 99 ) ;
@@ -561,20 +610,19 @@ mod tests {
561610 // masked bitset : 010000 100100 000000
562611 // after compression : 01 0 10 1 00 0
563612 // bitset of the returned filter : 010 101000
564- let m = GeoDiffCount7 :: from_ones ( Default :: default ( ) , [ 16 , 15 , 13 , 11 , 9 , 8 , 6 , 3 , 1 ] ) ;
613+ let m = GeoDiffCount7 :: from_ones ( [ 16 , 15 , 13 , 11 , 9 , 8 , 6 , 3 , 1 ] ) ;
565614 let n = masked ( & m, 0b110100 , 6 ) ;
566615 assert_eq ! ( n. iter_ones( ) . collect_vec( ) , vec![ 16 , 11 , 8 ] ) ;
567616
568617 for i in 0 ..100 {
569- let m = GeoDiffCount7 :: from_ones ( Default :: default ( ) , ( 0 ..i) . collect_vec ( ) ) ;
618+ let m = GeoDiffCount7 :: from_ones ( ( 0 ..i) . collect_vec ( ) ) ;
570619 let n = masked ( & m, 0b111 , 3 ) ;
571620 assert_eq ! ( m, n) ;
572621 }
573622
574623 for i in 0 ..300 {
575- let m = GeoDiffCount7 :: from_ones ( Default :: default ( ) , ( 0 ..i) . collect_vec ( ) ) ;
576- let slow =
577- GeoDiffCount :: from_ones ( Default :: default ( ) , masked ( & m, 0b110 , 3 ) . iter_ones ( ) ) ;
624+ let m = GeoDiffCount7 :: from_ones ( ( 0 ..i) . collect_vec ( ) ) ;
625+ let slow = GeoDiffCount :: from_ones ( masked ( & m, 0b110 , 3 ) . iter_ones ( ) ) ;
578626 let n = masked ( & m, 0b110 , 3 ) ;
579627 assert_eq ! ( slow, n, "in iteration: {i}" ) ;
580628 }
@@ -626,20 +674,6 @@ mod tests {
626674 assert_eq ! ( vec![ 17 , 11 , 7 ] , a. msb. iter( ) . copied( ) . collect_vec( ) ) ;
627675 }
628676
629- impl < C : GeoConfig < Diff > > GeoDiffCount < ' _ , C > {
630- fn from_ones ( config : C , ones : impl IntoIterator < Item = C :: BucketType > ) -> Self {
631- let mut result = Self :: new ( config) ;
632- for one in ones {
633- result. xor_bit ( one) ;
634- }
635- result
636- }
637-
638- fn iter_ones ( & self ) -> impl Iterator < Item = C :: BucketType > + ' _ {
639- iter_ones ( self . bit_chunks ( ) . peekable ( ) ) . map ( C :: BucketType :: from_usize)
640- }
641- }
642-
643677 #[ test]
644678 fn test_serialization_empty ( ) {
645679 let before = GeoDiffCount7 :: default ( ) ;
@@ -649,15 +683,15 @@ mod tests {
649683
650684 assert_eq ! ( writer. len( ) , 0 ) ;
651685
652- let after = GeoDiffCount7 :: from_bytes ( before. config . clone ( ) , & writer) ;
686+ let after = GeoDiffCount7 :: from_bytes_with_config ( before. config . clone ( ) , & writer) ;
653687
654688 assert_eq ! ( before, after) ;
655689 }
656690
657691 // This helper exists in order to easily test serializing types with different
658692 // bucket types in the MSB sparse bit field representation. See tests below.
659693 #[ cfg( target_endian = "little" ) ]
660- fn serialization_round_trip < C : GeoConfig < Diff > + Default > ( rnd : & mut StdRng ) {
694+ fn serialization_round_trip < C : GeoConfig < Diff > + Default > ( rnd : & mut ChaCha12Rng ) {
661695 // Run 100 simulations of random values being put into
662696 // a diff counter. "Serializing" to a vector to emulate
663697 // writing to a disk, and then deserializing and asserting
@@ -676,7 +710,10 @@ mod tests {
676710 let pad_amount = ( 0 ..8 ) . choose ( rnd) . unwrap ( ) ;
677711 writer. write_all ( & padding[ ..pad_amount] ) . unwrap ( ) ;
678712 before. write ( & mut writer) . unwrap ( ) ;
679- let after = GeoDiffCount :: < ' _ , C > :: from_bytes ( before. config . clone ( ) , & writer[ pad_amount..] ) ;
713+ let after = GeoDiffCount :: < ' _ , C > :: from_bytes_with_config (
714+ before. config . clone ( ) ,
715+ & writer[ pad_amount..] ,
716+ ) ;
680717 assert_eq ! ( before, after) ;
681718 }
682719
0 commit comments