From 1fbb260cf8edd38edbc077fcae7cbc546665f272 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Thu, 28 Aug 2025 23:26:21 +0000 Subject: [PATCH 1/3] [WIP] Allow V2 reader to read v1 manifests --- crates/iceberg/src/spec/manifest_list.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/crates/iceberg/src/spec/manifest_list.rs b/crates/iceberg/src/spec/manifest_list.rs index 64ed364c75..57d025b65e 100644 --- a/crates/iceberg/src/spec/manifest_list.rs +++ b/crates/iceberg/src/spec/manifest_list.rs @@ -226,6 +226,8 @@ impl ManifestListWriter { /// This is a helper module that defines the schema field of the manifest list entry. mod _const_schema { + + use crate::spec::{Literal,PrimitiveLiteral}; use std::sync::Arc; use apache_avro::Schema as AvroSchema; @@ -233,7 +235,7 @@ mod _const_schema { use crate::avro::schema_to_avro_schema; use crate::spec::{ - ListType, NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type, + ListType, ManifestContentType, NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type }; static MANIFEST_PATH: Lazy = { @@ -268,8 +270,9 @@ mod _const_schema { Arc::new(NestedField::required( 517, "content", - Type::Primitive(PrimitiveType::Int), - )) + Type::Primitive(PrimitiveType::Int)).with_initial_default( + Literal::Primitive(PrimitiveLiteral::Int(ManifestContentType::Data as i32))) + ) }) }; static SEQUENCE_NUMBER: Lazy = { @@ -277,8 +280,10 @@ mod _const_schema { Arc::new(NestedField::required( 515, "sequence_number", - Type::Primitive(PrimitiveType::Long), - )) + Type::Primitive(PrimitiveType::Long)).with_initial_default( + Literal::Primitive(PrimitiveLiteral::Long(0)) + ) + ) }) }; static MIN_SEQUENCE_NUMBER: Lazy = { @@ -286,8 +291,10 @@ mod _const_schema { Arc::new(NestedField::required( 516, "min_sequence_number", - Type::Primitive(PrimitiveType::Long), - )) + Type::Primitive(PrimitiveType::Long)).with_initial_default( + Literal::Primitive(PrimitiveLiteral::Long(0)) + ) + ) }) }; static ADDED_SNAPSHOT_ID: Lazy = { @@ -1304,7 +1311,7 @@ mod test { let io = FileIOBuilder::new_fs_io().build().unwrap(); let output_file = io.new_output(path.to_str().unwrap()).unwrap(); - let mut writer = ManifestListWriter::v2(output_file, 1646658105718557341, Some(0), 1); + let mut writer = ManifestListWriter::v1(output_file, 1646658105718557341, Some(0)); writer .add_manifests(expected_manifest_list.entries.clone().into_iter()) .unwrap(); From 4b109f3f223e45b67fbced492a150331805667ca Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Fri, 29 Aug 2025 05:51:49 +0000 Subject: [PATCH 2/3] defaults belong in serde --- crates/iceberg/src/spec/manifest_list.rs | 35 +++++++++++++++--------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/crates/iceberg/src/spec/manifest_list.rs b/crates/iceberg/src/spec/manifest_list.rs index 57d025b65e..f9970478fe 100644 --- a/crates/iceberg/src/spec/manifest_list.rs +++ b/crates/iceberg/src/spec/manifest_list.rs @@ -227,7 +227,6 @@ impl ManifestListWriter { /// This is a helper module that defines the schema field of the manifest list entry. mod _const_schema { - use crate::spec::{Literal,PrimitiveLiteral}; use std::sync::Arc; use apache_avro::Schema as AvroSchema; @@ -235,7 +234,7 @@ mod _const_schema { use crate::avro::schema_to_avro_schema; use crate::spec::{ - ListType, ManifestContentType, NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type + ListType, NestedField, NestedFieldRef, PrimitiveType, Schema, StructType, Type, }; static MANIFEST_PATH: Lazy = { @@ -270,9 +269,8 @@ mod _const_schema { Arc::new(NestedField::required( 517, "content", - Type::Primitive(PrimitiveType::Int)).with_initial_default( - Literal::Primitive(PrimitiveLiteral::Int(ManifestContentType::Data as i32))) - ) + Type::Primitive(PrimitiveType::Int), + )) }) }; static SEQUENCE_NUMBER: Lazy = { @@ -280,10 +278,8 @@ mod _const_schema { Arc::new(NestedField::required( 515, "sequence_number", - Type::Primitive(PrimitiveType::Long)).with_initial_default( - Literal::Primitive(PrimitiveLiteral::Long(0)) - ) - ) + Type::Primitive(PrimitiveType::Long), + )) }) }; static MIN_SEQUENCE_NUMBER: Lazy = { @@ -291,10 +287,8 @@ mod _const_schema { Arc::new(NestedField::required( 516, "min_sequence_number", - Type::Primitive(PrimitiveType::Long)).with_initial_default( - Literal::Primitive(PrimitiveLiteral::Long(0)) - ) - ) + Type::Primitive(PrimitiveType::Long), + )) }) }; static ADDED_SNAPSHOT_ID: Lazy = { @@ -803,8 +797,11 @@ pub(super) mod _serde { pub manifest_path: String, pub manifest_length: i64, pub partition_spec_id: i32, + #[serde(default = "v2_default_content_for_v1")] pub content: i32, + #[serde(default = "v2_default_sequence_number_for_v1")] pub sequence_number: i64, + #[serde(default = "v2_default_min_sequence_number_for_v1")] pub min_sequence_number: i64, pub added_snapshot_id: i64, #[serde(alias = "added_data_files_count", alias = "added_files_count")] @@ -843,6 +840,18 @@ pub(super) mod _serde { } } + fn v2_default_content_for_v1() -> i32 { + super::ManifestContentType::Data as i32 + } + + fn v2_default_sequence_number_for_v1() -> i64 { + 0 + } + + fn v2_default_min_sequence_number_for_v1() -> i64 { + 0 + } + impl ManifestFileV1 { /// Converts the [ManifestFileV1] into a [ManifestFile]. pub fn try_into(self) -> Result { From f470eeb98773c2b96e3a6eafb05756bb916dc5b0 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Fri, 29 Aug 2025 05:52:57 +0000 Subject: [PATCH 3/3] remove whitespace --- crates/iceberg/src/spec/manifest_list.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/iceberg/src/spec/manifest_list.rs b/crates/iceberg/src/spec/manifest_list.rs index f9970478fe..43808bb249 100644 --- a/crates/iceberg/src/spec/manifest_list.rs +++ b/crates/iceberg/src/spec/manifest_list.rs @@ -226,7 +226,6 @@ impl ManifestListWriter { /// This is a helper module that defines the schema field of the manifest list entry. mod _const_schema { - use std::sync::Arc; use apache_avro::Schema as AvroSchema;