Skip to content

Commit 55c155f

Browse files
committed
Address corner case when sqlite columns have no declared type and improve integer support
1 parent d30fb81 commit 55c155f

2 files changed

Lines changed: 42 additions & 14 deletions

File tree

splitgraph/ingestion/sqlite/__init__.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,17 @@
55
import sqlite3
66
import tempfile
77
from contextlib import contextmanager
8-
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple, Union
8+
from typing import (
9+
TYPE_CHECKING,
10+
Any,
11+
Dict,
12+
Generator,
13+
List,
14+
Optional,
15+
Set,
16+
Tuple,
17+
Union,
18+
)
919

1020
import requests
1121
from psycopg2.sql import SQL, Identifier
@@ -102,7 +112,8 @@ def sqlite_to_postgres_type(raw_sqlite_type: str) -> str:
102112
# from: https://www.sqlite.org/datatype3.html#determination_of_column_affinity
103113
# If the declared type contains the string "INT" then it is assigned INTEGER affinity.
104114
if "INT" in sqlite_type:
105-
return "INTEGER"
115+
# SQLite only has 64 bit integers in memory, see: https://www.sqlite.org/datatype3.html#storage_classes_and_datatypes
116+
return "BIGINT"
106117
# If the declared type of the column contains any of the strings "CHAR", "CLOB", or "TEXT" then that column has TEXT affinity. Notice that the type VARCHAR contains the string "CHAR" and is thus assigned TEXT affinity.
107118
if "CHAR" in sqlite_type or "CLOB" in sqlite_type or "TEXT" in sqlite_type:
108119
return "TEXT"
@@ -118,6 +129,7 @@ def sqlite_to_postgres_type(raw_sqlite_type: str) -> str:
118129

119130
def sqlite_connection_to_introspection_result(con: sqlite3.Connection) -> IntrospectionResult:
120131
schema = IntrospectionResult({})
132+
tables_with_untyped_columns: Set[str] = set()
121133
for (
122134
table_name,
123135
column_id,
@@ -127,12 +139,21 @@ def sqlite_connection_to_introspection_result(con: sqlite3.Connection) -> Intros
127139
_default_value,
128140
pk,
129141
) in query_connection(con, LIST_TABLES_QUERY):
130-
table = schema.get(table_name, ([], TableParams({})))
131-
assert isinstance(table, tuple)
132-
table[0].append(
133-
TableColumn(column_id + 1, column_name, sqlite_to_postgres_type(column_type), pk != 0)
134-
)
135-
schema[table_name] = table
142+
if column_type == "":
143+
tables_with_untyped_columns.add(table_name)
144+
else:
145+
table = schema.get(table_name, ([], TableParams({})))
146+
assert isinstance(table, tuple)
147+
table[0].append(
148+
TableColumn(
149+
column_id + 1, column_name, sqlite_to_postgres_type(column_type), pk != 0
150+
)
151+
)
152+
schema[table_name] = table
153+
# remove tables with invalid untyped columns
154+
for t in tables_with_untyped_columns:
155+
if t in schema:
156+
del schema[t]
136157
return schema
137158

138159

@@ -245,11 +266,18 @@ def _batched_copy(
245266
insert_table_contents = (
246267
table_contents if len(primary_keys) > 0 else [row[1:] for row in table_contents]
247268
)
248-
self.engine.run_sql_batch(
249-
SQL("INSERT INTO {0}.{1} ").format(Identifier(schema), Identifier(table_name))
250-
+ SQL(" VALUES (" + ",".join(itertools.repeat("%s", len(schema_spec))) + ")"),
251-
insert_table_contents,
252-
) # nosec
269+
try:
270+
self.engine.run_sql_batch(
271+
SQL("INSERT INTO {0}.{1} ").format(Identifier(schema), Identifier(table_name))
272+
+ SQL(" VALUES (" + ",".join(itertools.repeat("%s", len(schema_spec))) + ")"),
273+
insert_table_contents,
274+
) # nosec
275+
except Exception as e:
276+
print(
277+
"Received exception %s running query '%s' with parameters %s"
278+
% (str(e), query, str(parameters))
279+
)
280+
raise e
253281
return total_row_count
254282

255283
def _load(self, schema: str, tables: Optional[TableInfo] = None):

test/splitgraph/ingestion/test_sqlite.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33

44
def test_type_mapping():
5-
assert sqlite_to_postgres_type("INT") == "INTEGER"
5+
assert sqlite_to_postgres_type("INT") == "BIGINT"
66
assert sqlite_to_postgres_type("TEXT") == "TEXT"
77
assert sqlite_to_postgres_type("text") == "TEXT"
88
assert sqlite_to_postgres_type("varchar(255)") == "VARCHAR(255)"

0 commit comments

Comments
 (0)