Skip to content

Commit dba8d18

Browse files
committed
add file systems config for duckdb
1 parent 44df249 commit dba8d18

2 files changed

Lines changed: 15 additions & 18 deletions

File tree

sqlmesh/core/config/connection.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ class BaseDuckDBConnectionConfig(ConnectionConfig):
270270
extensions: A list of autoloadable extensions to load.
271271
connector_config: A dictionary of configuration to pass into the duckdb connector.
272272
secrets: A list of dictionaries used to generate DuckDB secrets for authenticating with external services (e.g. S3).
273+
file_systems: A list of dictionaries used to register `fsspec` filesystems to the DuckDB cursor.
273274
concurrent_tasks: The maximum number of tasks that can use this connection concurrently.
274275
register_comments: Whether or not to register model comments with the SQL engine.
275276
pre_ping: Whether or not to pre-ping the connection before starting a new transaction to ensure it is still alive.
@@ -281,6 +282,7 @@ class BaseDuckDBConnectionConfig(ConnectionConfig):
281282
extensions: t.List[t.Union[str, t.Dict[str, t.Any]]] = []
282283
connector_config: t.Dict[str, t.Any] = {}
283284
secrets: t.List[t.Dict[str, t.Any]] = []
285+
file_systems: t.List[t.Dict[str, t.Any]] = []
284286

285287
concurrent_tasks: int = 1
286288
register_comments: bool = True
@@ -375,6 +377,15 @@ def init(cursor: duckdb.DuckDBPyConnection) -> None:
375377
except Exception as e:
376378
raise ConfigError(f"Failed to create secret: {e}")
377379

380+
if self.file_systems:
381+
from fsspec import filesystem # type: ignore
382+
383+
for file_system in self.file_systems:
384+
protocol = file_system.pop("protocol")
385+
storage_options = file_system.pop("storage_options")
386+
fs = filesystem(protocol, **storage_options)
387+
cursor.register_filesystem(fs)
388+
378389
for i, (alias, path_options) in enumerate(
379390
(getattr(self, "catalogs", None) or {}).items()
380391
):
@@ -386,24 +397,6 @@ def init(cursor: duckdb.DuckDBPyConnection) -> None:
386397
try:
387398
if isinstance(path_options, DuckDBAttachOptions):
388399
query = path_options.to_sql(alias)
389-
390-
if path_options.data_path.split(":")[0] == "abfs":
391-
392-
if path_options.azure_account_name is None or path_options.azure_account_host is None:
393-
raise ValueError("azure_account_name and azure_account_host must be set when using abfs protocol")
394-
395-
396-
storage_options = {
397-
"account_name": path_options.azure_account_name,
398-
"account_host": path_options.azure_account_host,
399-
"anon":False,
400-
}
401-
from fsspec import filesystem
402-
403-
fs = filesystem("abfs", **storage_options)
404-
cursor.register_filesystem(fs)
405-
cursor.commit()
406-
407400
else:
408401
query = f"ATTACH IF NOT EXISTS '{path_options}'"
409402
if not path_options.startswith("md:"):

sqlmesh/dbt/target.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ class DuckDbConfig(TargetConfig):
138138
extensions: A list of autoloadable extensions to load.
139139
settings: A dictionary of settings to pass into the duckdb connector.
140140
secrets: A list of secrets to pass to the secret manager in the duckdb connector.
141+
file_systems: A list of `fsspec` filesystems to register in the duckdb connection.
141142
"""
142143

143144
type: t.Literal["duckdb"] = "duckdb"
@@ -147,6 +148,7 @@ class DuckDbConfig(TargetConfig):
147148
extensions: t.Optional[t.List[str]] = None
148149
settings: t.Optional[t.Dict[str, t.Any]] = None
149150
secrets: t.Optional[t.List[t.Dict[str, t.Any]]] = None
151+
file_systems: t.Optional[t.List[t.Dict[str, t.Any]]] = None
150152

151153
@model_validator(mode="before")
152154
def validate_authentication(cls, data: t.Any) -> t.Any:
@@ -182,6 +184,8 @@ def to_sqlmesh(self, **kwargs: t.Any) -> ConnectionConfig:
182184
kwargs["connector_config"] = self.settings
183185
if self.secrets is not None:
184186
kwargs["secrets"] = self.secrets
187+
if self.file_systems is not None:
188+
kwargs["file_systems"] = self.file_systems
185189
return DuckDBConnectionConfig(
186190
database=self.path,
187191
concurrent_tasks=1,

0 commit comments

Comments
 (0)