Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
1971ef9
update
lonless9 Sep 30, 2025
f1c75a6
Merge remote-tracking branch 'origin/main' into iceberg-scan
lonless9 Sep 30, 2025
d3f4eed
update
lonless9 Sep 30, 2025
3b45d10
update
lonless9 Oct 9, 2025
1f5764e
Merge remote-tracking branch 'origin/main' into iceberg-scan
lonless9 Oct 9, 2025
fe6ae73
update
lonless9 Oct 9, 2025
e0800ca
update
lonless9 Oct 9, 2025
6cd4008
update
lonless9 Oct 9, 2025
919b7c7
update
lonless9 Oct 9, 2025
e9781c4
update
lonless9 Oct 10, 2025
364ae0a
Merge remote-tracking branch 'origin/main' into iceberg-scan
lonless9 Oct 10, 2025
cc9c217
update
lonless9 Oct 12, 2025
a6adbb3
catalog spec
lonless9 Oct 12, 2025
65eedc3
comments
lonless9 Oct 12, 2025
b7b1283
update
lonless9 Oct 13, 2025
68d5d22
update
lonless9 Oct 13, 2025
a3c2920
Merge remote-tracking branch 'origin/main' into iceberg-scan
lonless9 Oct 13, 2025
c4797a3
pruning
lonless9 Oct 13, 2025
d6450f1
improve stat
lonless9 Oct 13, 2025
8b0005b
manifest level
lonless9 Oct 13, 2025
5dbf113
update
lonless9 Oct 13, 2025
22b2e12
test
lonless9 Oct 13, 2025
55acd5a
log level
lonless9 Oct 14, 2025
498c233
rewrite
lonless9 Oct 14, 2025
d4e0f5b
pushdown
lonless9 Oct 14, 2025
6c3dfcc
most todos
lonless9 Oct 14, 2025
962fccb
test update
lonless9 Oct 14, 2025
65d39ae
update
lonless9 Oct 14, 2025
ee1aa19
basic options
lonless9 Oct 15, 2025
c47f605
update
lonless9 Oct 15, 2025
8550f81
fmt
lonless9 Oct 15, 2025
2ee47cd
update
lonless9 Oct 16, 2025
12d4b58
Merge remote-tracking branch 'origin/main' into iceberg-scan
lonless9 Oct 16, 2025
64de0d1
remove pyiceberg-core
lonless9 Oct 16, 2025
a0bab5c
pin pydantic
lonless9 Oct 16, 2025
49755f2
add iceberg-core
lonless9 Oct 16, 2025
2e63cf9
address comments
lonless9 Oct 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ bytes = "1.10.1"
indexmap = "2.11.4"
pin-project-lite = "0.2.16"
ordered-float = { version = "5.1.0", features = ["serde"] }
apache-avro = { version = "0.20.0" }

######
# The versions of the following dependencies are managed manually.
Expand Down
1 change: 1 addition & 0 deletions crates/sail-data-source/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ workspace = true
sail-common = { path = "../sail-common" }
sail-common-datafusion = { path = "../sail-common-datafusion" }
sail-delta-lake = { path = "../sail-delta-lake" }
sail-iceberg = { path = "../sail-iceberg" }

async-trait = { workspace = true }
serde = { workspace = true }
Expand Down
45 changes: 45 additions & 0 deletions crates/sail-data-source/src/formats/iceberg.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
use std::sync::Arc;

use async_trait::async_trait;
use datafusion::catalog::{Session, TableProvider};
use datafusion::common::Result;
use datafusion::physical_plan::ExecutionPlan;
use sail_common_datafusion::datasource::{SinkInfo, SourceInfo, TableFormat};
use sail_iceberg::IcebergTableFormat;

/// Iceberg table format implementation that delegates to sail-iceberg
#[derive(Debug)]
pub struct IcebergDataSourceFormat {
inner: IcebergTableFormat,
}

impl Default for IcebergDataSourceFormat {
fn default() -> Self {
Self {
inner: IcebergTableFormat,
}
}
}

#[async_trait]
impl TableFormat for IcebergDataSourceFormat {
fn name(&self) -> &str {
self.inner.name()
}

async fn create_provider(
&self,
ctx: &dyn Session,
info: SourceInfo,
) -> Result<Arc<dyn TableProvider>> {
self.inner.create_provider(ctx, info).await
}

async fn create_writer(
&self,
ctx: &dyn Session,
info: SinkInfo,
) -> Result<Arc<dyn ExecutionPlan>> {
self.inner.create_writer(ctx, info).await
}
}
1 change: 1 addition & 0 deletions crates/sail-data-source/src/formats/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub mod binary;
pub mod console;
pub mod csv;
pub mod delta;
pub mod iceberg;
pub mod json;
pub mod listing;
pub mod parquet;
Expand Down
32 changes: 32 additions & 0 deletions crates/sail-data-source/src/options/data/iceberg_read.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Options for reading from an Apache Iceberg table.

- key: use_ref
aliases:
- ref
- branch
- tag
- iceberg.ref
description: |
Select a snapshot reference (tag or branch) to time-travel when reading.
If unset, the current snapshot is used.
supported: true
rust_type: String

- key: snapshot_id
aliases:
- snapshot-id
- snapshotId
description: |
Select a specific snapshot id to time-travel when reading.
supported: true
rust_type: i64
rust_deserialize_with: crate::options::serde::deserialize_i64

- key: timestamp_as_of
aliases:
- timestampAsOf
description: |
Select snapshot as of the given timestamp. Accepts RFC3339 or 'yyyy-MM-dd HH:mm:ss.SSS'.
supported: true
rust_type: String

22 changes: 22 additions & 0 deletions crates/sail-data-source/src/options/data/iceberg_write.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Options for writing to an Apache Iceberg table.

- key: overwrite_schema
aliases:
- overwriteSchema
description: |
If `true`, allows overwriting the schema of the table when using overwrite mode.
default: "false"
supported: true
rust_type: bool
rust_deserialize_with: crate::options::serde::deserialize_bool

- key: merge_schema
aliases:
- mergeSchema
description: |
If `true`, allows automatic schema merging during an append or overwrite operation.
default: "false"
supported: true
rust_type: bool
rust_deserialize_with: crate::options::serde::deserialize_bool

2 changes: 2 additions & 0 deletions crates/sail-data-source/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::formats::binary::BinaryTableFormat;
use crate::formats::console::ConsoleTableFormat;
use crate::formats::csv::CsvTableFormat;
use crate::formats::delta::DeltaTableFormat;
use crate::formats::iceberg::IcebergDataSourceFormat;
use crate::formats::json::JsonTableFormat;
use crate::formats::parquet::ParquetTableFormat;
use crate::formats::rate::RateTableFormat;
Expand Down Expand Up @@ -42,6 +43,7 @@ impl TableFormatRegistry {
registry.register_format(Arc::new(BinaryTableFormat::default()));
registry.register_format(Arc::new(CsvTableFormat::default()));
registry.register_format(Arc::new(DeltaTableFormat));
registry.register_format(Arc::new(IcebergDataSourceFormat::default()));
registry.register_format(Arc::new(JsonTableFormat::default()));
registry.register_format(Arc::new(ParquetTableFormat::default()));
registry.register_format(Arc::new(TextTableFormat::default()));
Expand Down
39 changes: 39 additions & 0 deletions crates/sail-iceberg/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
[package]
name = "sail-iceberg"
version = { workspace = true }
edition = { workspace = true }

[dependencies]
sail-common-datafusion = { path = "../sail-common-datafusion" }

# DataFusion dependencies
datafusion = { workspace = true }
datafusion-common = { workspace = true }

# Arrow dependencies
arrow-schema = { workspace = true }

# Essential utilities
async-trait = { workspace = true }
object_store = { workspace = true }
chrono = { workspace = true }
serde_json = { workspace = true }
url = { workspace = true }
futures = { workspace = true }
serde = { workspace = true, features = ["derive"] }
uuid = { workspace = true }
# parquet = { workspace = true }
# bytes = { workspace = true }
# indexmap = { workspace = true }
log = { workspace = true }
# itertools = { workspace = true }
percent-encoding = { workspace = true }
once_cell = { workspace = true }
ordered-float = { workspace = true }
apache-avro = { workspace = true }
num-bigint = { workspace = true }
num-traits = { workspace = true }
# base64 = { workspace = true }

[lints]
workspace = true
Loading