diff --git a/crates/catalog/glue/src/schema.rs b/crates/catalog/glue/src/schema.rs index 864320dae4..4f5c1f664a 100644 --- a/crates/catalog/glue/src/schema.rs +++ b/crates/catalog/glue/src/schema.rs @@ -178,6 +178,12 @@ impl SchemaVisitor for GlueSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } + PrimitiveType::Variant => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Conversion from Variant to Glue type is not supported", + )); + } }; Ok(glue_type) diff --git a/crates/catalog/hms/src/schema.rs b/crates/catalog/hms/src/schema.rs index c23b48719d..f48d163b30 100644 --- a/crates/catalog/hms/src/schema.rs +++ b/crates/catalog/hms/src/schema.rs @@ -135,6 +135,12 @@ impl SchemaVisitor for HiveSchemaBuilder { PrimitiveType::Decimal { precision, scale } => { format!("decimal({precision},{scale})") } + PrimitiveType::Variant => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Conversion from Variant to Hive type is not supported", + )); + } }; Ok(hive_type) diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index 9b504421ae..68fc288ce1 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -690,6 +690,10 @@ impl SchemaVisitor for ToArrowSchemaConverter { crate::spec::PrimitiveType::Binary => { Ok(ArrowSchemaOrFieldOrType::Type(DataType::LargeBinary)) } + crate::spec::PrimitiveType::Variant => Err(crate::Error::new( + crate::ErrorKind::FeatureUnsupported, + "Arrow schema conversion for Variant is not yet implemented", + )), } } } @@ -1131,6 +1135,7 @@ pub fn datum_to_arrow_type_with_ree(datum: &Datum) -> DataType { PrimitiveType::Uuid => make_ree(DataType::Binary), PrimitiveType::Fixed(_) => make_ree(DataType::Binary), PrimitiveType::Binary => make_ree(DataType::Binary), + PrimitiveType::Variant => make_ree(DataType::Binary), PrimitiveType::Decimal { precision, scale } => { make_ree(DataType::Decimal128(*precision as u8, *scale as i8)) } diff --git a/crates/iceberg/src/arrow/value.rs b/crates/iceberg/src/arrow/value.rs index d07233c420..e349af2392 100644 --- a/crates/iceberg/src/arrow/value.rs +++ b/crates/iceberg/src/arrow/value.rs @@ -424,6 +424,10 @@ impl SchemaWithPartnerVisitor for ArrowArrayToIcebergStructConverter { )) } } + PrimitiveType::Variant => Err(Error::new( + ErrorKind::FeatureUnsupported, + "Arrow value extraction for Variant is not yet implemented", + )), } } } diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index fdbc680977..dbe70a482f 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -237,6 +237,7 @@ impl SchemaVisitor for SchemaToAvroSchema { PrimitiveType::Uuid => AvroSchema::Uuid, PrimitiveType::Fixed(len) => avro_fixed_schema((*len) as usize)?, PrimitiveType::Binary => AvroSchema::Bytes, + PrimitiveType::Variant => AvroSchema::Bytes, PrimitiveType::Decimal { precision, scale } => { avro_decimal_schema(*precision as usize, *scale as usize)? } diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index ad4aea758f..ecf8ceb0a9 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -247,6 +247,8 @@ pub enum PrimitiveType { Fixed(u64), /// Arbitrary-length byte array. Binary, + /// Semi-structured data type (Iceberg spec v3). Stored in Parquet as `LogicalType::Variant`. + Variant, } impl PrimitiveType { @@ -382,6 +384,7 @@ impl fmt::Display for PrimitiveType { PrimitiveType::Uuid => write!(f, "uuid"), PrimitiveType::Fixed(size) => write!(f, "fixed({size})"), PrimitiveType::Binary => write!(f, "binary"), + PrimitiveType::Variant => write!(f, "variant"), } } } @@ -884,7 +887,8 @@ mod tests { {"id": 13, "name": "uuid_field", "required": true, "type": "uuid"}, {"id": 14, "name": "fixed_field", "required": true, "type": "fixed[10]"}, {"id": 15, "name": "binary_field", "required": true, "type": "binary"}, - {"id": 16, "name": "string_field", "required": true, "type": "string"} + {"id": 16, "name": "string_field", "required": true, "type": "string"}, + {"id": 17, "name": "variant_field", "required": false, "type": "variant"} ] } "#; @@ -964,6 +968,12 @@ mod tests { Type::Primitive(PrimitiveType::String), ) .into(), + NestedField::optional( + 17, + "variant_field", + Type::Primitive(PrimitiveType::Variant), + ) + .into(), ], id_lookup: OnceLock::default(), name_lookup: OnceLock::default(), @@ -1320,4 +1330,25 @@ mod tests { .contains("expected type 'struct'") ); } + + #[test] + fn variant_type_display() { + assert_eq!(PrimitiveType::Variant.to_string(), "variant"); + } + + #[test] + fn variant_type_serde() { + let json = r#"{"id": 1, "name": "v", "required": false, "type": "variant"}"#; + let field: NestedField = serde_json::from_str(json).unwrap(); + assert_eq!(*field.field_type, Type::Primitive(PrimitiveType::Variant)); + let serialized = serde_json::to_string(&field).unwrap(); + assert!(serialized.contains("\"variant\"")); + } + + #[test] + fn variant_type_not_compatible_with_literals() { + assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Boolean(true))); + assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Int(0))); + assert!(!PrimitiveType::Variant.compatible(&PrimitiveLiteral::Binary(vec![]))); + } } diff --git a/crates/iceberg/src/spec/values/datum.rs b/crates/iceberg/src/spec/values/datum.rs index 68ea6b3d46..46a783a770 100644 --- a/crates/iceberg/src/spec/values/datum.rs +++ b/crates/iceberg/src/spec/values/datum.rs @@ -419,6 +419,7 @@ impl Datum { } PrimitiveType::Fixed(_) => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)), + PrimitiveType::Variant => PrimitiveLiteral::Binary(Vec::from(bytes)), PrimitiveType::Decimal { .. } => { PrimitiveLiteral::Int128(i128_from_be_bytes(bytes).ok_or_else(|| { Error::new(