diff --git a/crates/iceberg/src/spec/values/literal.rs b/crates/iceberg/src/spec/values/literal.rs index e82fa197cd..1da7b27b61 100644 --- a/crates/iceberg/src/spec/values/literal.rs +++ b/crates/iceberg/src/spec/values/literal.rs @@ -422,6 +422,17 @@ impl Literal { } } + fn parse_hex_bytes(s: &str) -> Result> { + let invalid = || Error::new(ErrorKind::DataInvalid, format!("invalid hex string: {s}")); + if !s.is_ascii() || !s.len().is_multiple_of(2) { + return Err(invalid()); + } + (0..s.len()) + .step_by(2) + .map(|i| u8::from_str_radix(&s[i..i + 2], 16).map_err(|_| invalid())) + .collect() + } + /// Create iceberg value from a json value /// /// See [this spec](https://iceberg.apache.org/spec/#json-single-value-serialization) for reference. @@ -499,8 +510,22 @@ impl Literal { (PrimitiveType::Uuid, JsonValue::String(s)) => Ok(Some(Literal::Primitive( PrimitiveLiteral::UInt128(Uuid::parse_str(&s)?.as_u128()), ))), - (PrimitiveType::Fixed(_), JsonValue::String(_)) => todo!(), - (PrimitiveType::Binary, JsonValue::String(_)) => todo!(), + (PrimitiveType::Fixed(n), JsonValue::String(s)) => { + let bytes = Self::parse_hex_bytes(&s)?; + if bytes.len() as u64 != *n { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "fixed literal length mismatch: expected {n}, got {}", + bytes.len() + ), + )); + } + Ok(Some(Literal::Primitive(PrimitiveLiteral::Binary(bytes)))) + } + (PrimitiveType::Binary, JsonValue::String(s)) => Ok(Some(Literal::Primitive( + PrimitiveLiteral::Binary(Self::parse_hex_bytes(&s)?), + ))), ( PrimitiveType::Decimal { precision: _, @@ -662,7 +687,7 @@ impl Literal { (_, PrimitiveLiteral::Binary(val)) => Ok(JsonValue::String(val.iter().fold( String::new(), |mut acc, x| { - acc.push_str(&format!("{x:x}")); + acc.push_str(&format!("{x:02x}")); acc }, ))), diff --git a/crates/iceberg/src/spec/values/tests.rs b/crates/iceberg/src/spec/values/tests.rs index 41238ed899..2cf930bb09 100644 --- a/crates/iceberg/src/spec/values/tests.rs +++ b/crates/iceberg/src/spec/values/tests.rs @@ -1352,3 +1352,34 @@ fn test_date_from_json_as_number() { // Both formats should produce the same Literal value } + +#[test] +fn test_json_serde_binary() { + // "00ff05" covers both round-trip and zero-padding for bytes < 0x10. + check_json_serde( + "\"00ff05\"", + Literal::Primitive(PrimitiveLiteral::Binary(vec![0x00, 0xff, 0x05])), + &Type::Primitive(PrimitiveType::Binary), + ); +} + +#[test] +fn test_json_serde_fixed() { + check_json_serde( + "\"deadbeef\"", + Literal::Primitive(PrimitiveLiteral::Binary(vec![0xde, 0xad, 0xbe, 0xef])), + &Type::Primitive(PrimitiveType::Fixed(4)), + ); +} + +#[test] +fn test_try_from_json_fixed_length_mismatch() { + let raw = JsonValue::String("deadbeef".into()); + assert!(Literal::try_from_json(raw, &Type::Primitive(PrimitiveType::Fixed(5))).is_err()); +} + +#[test] +fn test_try_from_json_binary_invalid_hex() { + let raw = JsonValue::String("zz".into()); + assert!(Literal::try_from_json(raw, &Type::Primitive(PrimitiveType::Binary)).is_err()); +}