Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
4927566
Checkpoint.
Apr 15, 2026
f501d39
More progress.
Apr 16, 2026
2770dc9
End-to-end example working.
Apr 16, 2026
7ae3548
Remove auxiliary test files.
Apr 16, 2026
fbb1250
Upload AI handoff document.
Apr 16, 2026
0b62cbe
wired in bool save/load path + vec<bool> + tests; also fixed mutable …
suhasjs May 5, 2026
2134047
tests in diskann-record/src/lib.rs now use tempfile::tempdir() for au…
suhasjs May 5, 2026
5c9eca1
BufWriter now consumed by Writer::finish() + propagate errors using s…
suhasjs May 5, 2026
18c07cd
added compile time assertion to fail builds if usize::NBITS != 64
suhasjs May 13, 2026
f626191
added explicit null Value for Option<T>
suhasjs May 14, 2026
8d472c0
added Enum support with alongside
suhasjs May 14, 2026
d99e86e
Cleaned up Deserialize impl for Value<'a> by removing PhantomData; no…
suhasjs May 15, 2026
3c6ed2e
Cleaned up panic on errors --> add new lighterrors + propagate errors…
suhasjs May 15, 2026
9763a34
added Load/Save impl for Metric; roundtrip test + invalid variant tes…
suhasjs May 15, 2026
98547d7
added Load/Save impls for Config, IndexConfiguration (and a few enums…
suhasjs May 15, 2026
ad166fb
bridged save::Writer from diskann-record with StorageWriteProvider in…
suhasjs May 15, 2026
7a9c928
Added Save/Load impls for MemoryVectorProviderAsync; also modified lo…
suhasjs May 15, 2026
2d4e845
added Load/Save impls for FastMemoryVectorProviderAsync and SimpleNei…
suhasjs May 15, 2026
b88d3c8
added Load/Save impl for DefaultProvider + tests
suhasjs May 15, 2026
9f40c62
added Load/Save impl for DiskANNIndex<> + tests --> Can now load/save…
suhasjs May 16, 2026
80446ed
added a sample output captured from DiskANNIndex test case
suhasjs May 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ members = [
"diskann-benchmark-simd",
"diskann-benchmark",
"diskann-tools",
"vectorset",
"vectorset", "diskann-record",
]

default-members = [
Expand Down Expand Up @@ -54,6 +54,7 @@ diskann-linalg = { path = "diskann-linalg", version = "0.50.0" }
diskann-utils = { path = "diskann-utils", default-features = false, version = "0.50.0" }
diskann-quantization = { path = "diskann-quantization", default-features = false, version = "0.50.0" }
diskann-platform = { path = "diskann-platform", version = "0.50.0" }
diskann-record = { path = "diskann-record", version = "0.50.0" }
# Algorithm
diskann = { path = "diskann", version = "0.50.0" }
# Providers
Expand Down Expand Up @@ -85,7 +86,6 @@ iai-callgrind = "0.14.0"
itertools = "0.13.0"
num-traits = "0.2.15"
num_cpus = "1.16.0"
once_cell = "1.19.0"
opentelemetry = "0.30.0"
opentelemetry_sdk = "0.30.0"
paste = "1.0.15"
Expand Down
1 change: 1 addition & 0 deletions diskann-providers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ diskann-linalg = { workspace = true }
diskann = { workspace = true }
diskann-utils = { workspace = true }
diskann-quantization = { workspace = true, features = ["rayon"] }
diskann-record = { workspace = true }
tokio = { workspace = true, features = ["rt", "rt-multi-thread"] }
tempfile = { workspace = true, optional = true }
bf-tree = { workspace = true, optional = true }
Expand Down
98 changes: 96 additions & 2 deletions diskann-providers/src/index/wrapped_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,15 @@ mod tests {
};
use diskann_utils::test_data_root;
use diskann_vector::distance::Metric;
use rand::Rng;

use super::DiskANNIndex;
use super::{DiskANNIndex, create_current_thread_runtime};
use crate::{
index::diskann_async,
model::{
configuration::IndexConfiguration,
graph::provider::async_::{
common::{FullPrecision, TableBasedDeletes},
common::{FullPrecision, NoDeletes, NoStore, TableBasedDeletes},
inmem::{self, CreateFullPrecision, DefaultProvider},
},
},
Expand Down Expand Up @@ -537,4 +538,97 @@ mod tests {
assert_eq!(ids[0], 0);
assert_eq!(distances[0], 0.0);
}

/////////////////////////////////
// diskann-record round-trips //
/////////////////////////////////

#[test]
fn test_diskann_record_save_load_round_trip() {
// -- Build a `FullPrecisionProvider<f32, NoStore, NoDeletes>` index ----
let dim = 8;
let max_points = 32;
let num_points = 24;

// Deterministic synthetic data so the test is hermetic (no on-disk fixture).
let mut rng = create_rnd_from_seed_in_tests(0x9c6a1c3b29f74e51);
let train_data: Vec<Vec<f32>> = (0..num_points)
.map(|_| (0..dim).map(|_| rng.random_range(-1.0..1.0)).collect())
.collect();

let (build_config, parameters) = diskann_async::simplified_builder(
20,
16,
Metric::L2,
dim,
max_points,
|_| {},
)
.unwrap();

let fp_precursor =
CreateFullPrecision::new(parameters.dim, parameters.prefetch_cache_line_level);
let data_provider =
DefaultProvider::new_empty(parameters, fp_precursor, NoStore, NoDeletes).unwrap();

let index =
DiskANNIndex::new_with_current_thread_runtime(build_config.clone(), data_provider);
let ctx = DefaultContext;
for (i, v) in train_data.iter().enumerate() {
index
.insert(FullPrecision, &ctx, &(i as u32), v.as_slice())
.unwrap();
}

// -- Search on the original index --------------------------------------
let top_k = 5;
let search_l = 20;
let kind = graph::search::Knn::new_default(top_k, search_l).unwrap();
let query = train_data[0].as_slice();

let mut ids_orig = vec![0u32; top_k];
let mut dists_orig = vec![0.0f32; top_k];
let mut output_orig =
search_output_buffer::IdDistance::new(&mut ids_orig, &mut dists_orig);
let stats_orig = index
.search(kind, &FullPrecision, &ctx, query, &mut output_orig)
.unwrap();
assert_eq!(stats_orig.result_count, top_k as u32);
// The query is itself in the dataset, so the nearest neighbor must be at distance 0.
assert_eq!(ids_orig[0], 0);
assert_eq!(dists_orig[0], 0.0);

// -- Save via diskann-record (synchronous) -----------------------------
let dir = tempfile::tempdir().expect("tempdir");
let manifest = dir.path().join("manifest.json");
diskann_record::save::save_to_disk(&*index.inner, dir.path(), &manifest)
.expect("save_to_disk");

// -- Load via diskann-record into a fresh sync wrapper -----------------
type TestProvider = inmem::FullPrecisionProvider<f32, NoStore, NoDeletes>;
let loaded_inner: graph::DiskANNIndex<TestProvider> =
diskann_record::load::load_from_disk(&manifest, dir.path())
.expect("load_from_disk");
let (rt, handle) = create_current_thread_runtime();
let loaded: DiskANNIndex<TestProvider> = DiskANNIndex {
inner: Arc::new(loaded_inner),
_runtime: Some(rt),
handle,
};

// -- Search on the loaded index ----------------------------------------
let kind = graph::search::Knn::new_default(top_k, search_l).unwrap();
let mut ids_loaded = vec![0u32; top_k];
let mut dists_loaded = vec![0.0f32; top_k];
let mut output_loaded =
search_output_buffer::IdDistance::new(&mut ids_loaded, &mut dists_loaded);
let stats_loaded = loaded
.search(kind, &FullPrecision, &ctx, query, &mut output_loaded)
.unwrap();

// -- Results must match the pre-save search ----------------------------
assert_eq!(stats_orig.result_count, stats_loaded.result_count);
assert_eq!(ids_orig, ids_loaded);
assert_eq!(dists_orig, dists_loaded);
}
}
121 changes: 121 additions & 0 deletions diskann-providers/src/model/configuration/index_configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,78 @@ impl IndexConfiguration {
}
}

//////////////////////////////////
// diskann-record Save/Load //
//////////////////////////////////
//
// The wire format preserves the same fields that `IndexConfiguration::new` takes
// (`config`, `num_threads`, `dist_metric`, `dim`, `max_points`, `num_frozen_pts`) plus
// `random_seed`, because the seed is part of reproducibility. The prefetch tunables
// (`prefetch_lookahead`, `prefetch_cache_line_level`) are intentionally not persisted;
// they are deployment knobs, not part of the index itself, so loaders apply their own
// defaults (`None`).

impl diskann_record::save::Save for IndexConfiguration {
const VERSION: diskann_record::Version = diskann_record::Version::new(0, 0, 0);

fn save(
&self,
context: diskann_record::save::Context<'_>,
) -> diskann_record::save::Result<diskann_record::save::Record<'_>> {
Ok(diskann_record::save_fields!(
self,
context,
[
config,
num_threads,
dist_metric,
dim,
max_points,
num_frozen_pts,
random_seed,
]
))
}
}

impl diskann_record::load::Load<'_> for IndexConfiguration {
const VERSION: diskann_record::Version = diskann_record::Version::new(0, 0, 0);

fn load(
object: diskann_record::load::Object<'_>,
) -> diskann_record::load::Result<Self> {
diskann_record::load_fields!(
object,
[
config: Config,
num_threads: usize,
dist_metric: Metric,
dim: usize,
max_points: usize,
num_frozen_pts: NonZeroUsize,
random_seed: Option<u64>,
]
);
Ok(Self {
config,
num_threads,
dist_metric,
dim,
max_points,
num_frozen_pts,
prefetch_lookahead: None,
prefetch_cache_line_level: None,
random_seed,
})
}

fn load_legacy(
_object: diskann_record::load::Object<'_>,
) -> diskann_record::load::Result<Self> {
Err(diskann_record::load::error::Kind::UnknownVersion.into())
}
}

#[cfg(test)]
mod tests {
use diskann::utils::ONE;
Expand Down Expand Up @@ -177,4 +249,53 @@ mod tests {
index_configuration.config.pruned_degree().get()
);
}

/////////////////////////////////
// diskann-record round-trips //
/////////////////////////////////

fn round_trip_helper<T>(value: &T) -> T
where
T: diskann_record::save::Saveable + for<'a> diskann_record::load::Loadable<'a>,
{
let dir = tempfile::tempdir().expect("tempdir");
let manifest = dir.path().join("manifest.json");
diskann_record::save::save_to_disk(value, dir.path(), &manifest)
.expect("save_to_disk");
diskann_record::load::load_from_disk::<T>(&manifest, dir.path())
.expect("load_from_disk")
}

#[test]
fn index_configuration_round_trips_minimal() {
let original = IndexConfiguration::new(Metric::L2, 128, 1000, ONE, 1, config());
assert_eq!(original, round_trip_helper(&original));
}

#[test]
fn index_configuration_round_trips_preserves_random_seed() {
let original = IndexConfiguration::new(Metric::Cosine, 64, 500, ONE, 4, config())
.with_pseudo_rng_from_seed(0xDEAD_BEEF_CAFE_F00D);
let restored = round_trip_helper(&original);
assert_eq!(original, restored);
assert_eq!(restored.random_seed, Some(0xDEAD_BEEF_CAFE_F00D));
}

#[test]
fn index_configuration_round_trips_drops_prefetch_fields() {
// Build a config with prefetch tunables set; they should NOT be persisted, so
// the loaded copy will differ from the original on those fields only.
let original = IndexConfiguration::new(Metric::L2, 128, 1000, ONE, 1, config())
.with_prefetch_lookahead(NonZeroUsize::new(8))
.with_prefetch_cache_line_level(Some(PrefetchCacheLineLevel::CacheLine8));
let restored = round_trip_helper(&original);
assert_eq!(restored.prefetch_lookahead, None);
assert_eq!(restored.prefetch_cache_line_level, None);

// Everything else still matches.
let mut expected = original.clone();
expected.prefetch_lookahead = None;
expected.prefetch_cache_line_level = None;
assert_eq!(expected, restored);
}
}
Loading