Skip to content

Commit dcca1d9

Browse files
committed
dependencies
1 parent 39b4249 commit dcca1d9

6 files changed

Lines changed: 456 additions & 115 deletions

File tree

poetry.lock

Lines changed: 357 additions & 84 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "rdf-sql-bulkloader"
3-
version = "0.1.0rc2"
3+
version = "0.0.0"
44
description = "rdf-sql-bulkloader"
55
authors = ["Chris Mungall <cjmungall@lbl.gov>"]
66
license = "BSD 3"
@@ -13,8 +13,8 @@ tox = "^3.25.1"
1313
click = "^8.1.3"
1414
importlib = "^1.0.4"
1515
lightrdf = "^0.2.1"
16-
curies = "^0.2.0"
17-
prefixmaps = "^0.1.2"
16+
curies = "*"
17+
prefixmaps = "*"
1818
pyoxigraph = "^0.3.6"
1919

2020
[tool.poetry.dev-dependencies]

src/rdf_sql_bulkloader/cli.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,17 @@ def main(verbose: int, quiet: bool):
3131

3232
@main.command()
3333
@click.option("--output", "-o", required=True)
34+
@click.option("--format",
35+
"-f")
3436
@click.option(
3537
"--force/--no-force", default=False, show_default=True, help="Recreates db if already present"
3638
)
39+
@click.option(
40+
"--append/--no-append", default=False, show_default=True, help="Append to existing db"
41+
)
42+
@click.option(
43+
"--create-tables/--no-create-tables", default=True, show_default=True, help="Adds statements table"
44+
)
3745
@click.option(
3846
"--rdftab-compatibility/--no-rdftab-compatibility",
3947
default=True,
@@ -42,21 +50,24 @@ def main(verbose: int, quiet: bool):
4250
)
4351
@click.option("--named-prefix-map", "-P", multiple=True, help="Names of prefixmaps, e.g. obo")
4452
@click.argument("files", nargs=-1)
45-
def load_sqlite(files, output, force: bool, rdftab_compatibility: bool, named_prefix_map: tuple):
53+
def load_sqlite(files, format, output, append: bool, force: bool, rdftab_compatibility: bool, named_prefix_map: tuple, **kwargs):
4654
"""Run the rdf-sql-bulkloader's demo command."""
4755
output_path = Path(output)
48-
if output_path.exists():
49-
if force:
50-
output_path.unlink()
51-
else:
52-
raise ValueError(f"Path exists {output_path}")
56+
if append:
57+
if not output_path.exists():
58+
raise ValueError(f"Cannot append as {output_path} does not exist")
59+
else:
60+
if output_path.exists():
61+
if force:
62+
output_path.unlink()
63+
else:
64+
raise ValueError(f"Path exists {output_path}")
5365
loader = SqliteBulkloader(
5466
output, named_prefix_maps=list(named_prefix_map) if named_prefix_map else None
5567
)
5668
loader.rdftab_compatibility = rdftab_compatibility
57-
for file in files:
58-
logging.info(f"Loading {file}")
59-
loader.bulkload(file)
69+
logging.info(f"Loading {files}")
70+
loader.bulkload(list(files), format, **kwargs)
6071

6172

6273
if __name__ == "__main__":

src/rdf_sql_bulkloader/loaders/bulkloader.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,18 @@
2828
);
2929
"""
3030

31+
RDFTAB_STATEMENT_DDL = """
32+
CREATE TABLE statements (
33+
stanza TEXT,
34+
subject TEXT,
35+
predicate TEXT,
36+
object TEXT,
37+
value TEXT,
38+
datatype TEXT,
39+
language TEXT
40+
);
41+
"""
42+
3143
PREFIX_DDL = """
3244
CREATE TABLE prefix (
3345
prefix TEXT,
@@ -64,6 +76,9 @@ class BulkLoader(ABC):
6476
converter: Converter = None
6577
index_statements: bool = False
6678
rdftab_compatibility: bool = True
79+
include_graph_name: bool = False
80+
graph_name_from_ontology: bool = False
81+
include_statement_id: bool = False
6782
use_shacl_namespaces: bool = True
6883
batch_size: int = field(default_factory=lambda: DEFAULT_CHUNK)
6984
_contract_uri_cache: Dict[Union[NamedNode, BlankNode], str] = field(default_factory=lambda: {})
@@ -111,6 +126,9 @@ def contract_uri(self, uri: Optional[NamedNode]) -> Optional[str]:
111126
else:
112127
return uri.value
113128

129+
def load_prefixes(self):
130+
raise NotImplementedError
131+
114132
def statements(self, path: Union[Path, str], mime_type=None) -> Iterator[STATEMENT]:
115133
"""Yields statement rows from an RDF file."""
116134
if mime_type is None:
@@ -148,4 +166,7 @@ def statements(self, path: Union[Path, str], mime_type=None) -> Iterator[STATEME
148166

149167
def ddl_statements(self) -> List[str]:
150168
"""Return CREATE TABLE statements."""
151-
return [STATEMENT_DDL, PREFIX_DDL]
169+
if self.rdftab_compatibility:
170+
return [RDFTAB_STATEMENT_DDL, PREFIX_DDL]
171+
else:
172+
return [STATEMENT_DDL, PREFIX_DDL]

src/rdf_sql_bulkloader/loaders/sqlite3_bulkloader.py

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
import logging
44
import sqlite3
55
from dataclasses import dataclass
6-
from typing import Any, Iterable, List
6+
from typing import Any, Iterable, List, Union
77

88
from rdf_sql_bulkloader.loaders.bulkloader import DEFAULT_CHUNK, BulkLoader
99

10+
11+
logger = logging.getLogger(__name__)
12+
1013
COLS = ["subject", "predicate", "object", "value", "datatype", "language"]
1114

1215
RDFTAB_INSERT = """
@@ -45,7 +48,22 @@ class SqliteBulkloader(BulkLoader):
4548

4649
connection: Any = None
4750

48-
def bulkload(self, path: str, mime_type=None):
51+
def create_ddl(self):
52+
"""
53+
Create DDL for a given path.
54+
55+
:return:
56+
"""
57+
con = self.connection
58+
for ddl_stmt in self.ddl_statements():
59+
con.execute(ddl_stmt)
60+
con.executemany(f"insert into prefix (prefix,base) values (?,?)", self.prefix_map.items())
61+
62+
def load_prefixes(self):
63+
con = self.connection
64+
raise NotImplementedError
65+
66+
def bulkload(self, paths: Union[str, List[str]], mime_type=None, create_tables=True):
4967
"""
5068
Bulkloads from a path.
5169
@@ -55,17 +73,28 @@ def bulkload(self, path: str, mime_type=None):
5573
"""
5674
con = sqlite3.connect(self.database_path)
5775
self.connection = con
58-
for ddl_stmt in self.ddl_statements():
59-
con.execute(ddl_stmt)
60-
con.executemany(f"insert into prefix (prefix,base) values (?,?)", self.prefix_map.items())
61-
colstr = ",".join(COLS)
62-
qs = ",".join(["?" for _ in COLS])
63-
for chunk_it in chunk(self.statements(path, mime_type), self.batch_size):
64-
tuples = []
65-
for t in chunk_it:
66-
tuples.append(t)
67-
logging.info(f"Loaded {len(tuples)} loaded; {tuples[0]}")
68-
con.executemany(f"insert into statement({colstr}) values ({qs})", tuples)
69-
if self.rdftab_compatibility:
70-
con.execute(RDFTAB_INSERT)
71-
con.commit()
76+
if create_tables:
77+
self.create_ddl()
78+
if not isinstance(paths, list):
79+
paths = [paths]
80+
for path in paths:
81+
print(path)
82+
logger.info(f"Loading {path} into {self.database_path} as {mime_type}...")
83+
colstr = ",".join(COLS)
84+
qs = ",".join(["?" for _ in COLS])
85+
for chunk_it in chunk(self.statements(path, mime_type), self.batch_size):
86+
tuples = []
87+
for t in chunk_it:
88+
if self.rdftab_compatibility:
89+
tuples.append(t+(t[0],))
90+
else:
91+
tuples.append(t)
92+
logging.info(f"Loaded {len(tuples)} loaded; {tuples[0]}")
93+
if self.rdftab_compatibility:
94+
con.executemany(f"insert into statements({colstr},stanza) values ({qs},?)", tuples)
95+
else:
96+
con.executemany(f"insert into statement({colstr}) values ({qs})", tuples)
97+
con.commit()
98+
#if self.rdftab_compatibility:
99+
# con.execute(RDFTAB_INSERT)
100+

tests/test_sqlite3_bulkloader.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,13 +95,13 @@
9595
]
9696

9797

98-
class TestSqlit3BulkLoader(unittest.TestCase):
98+
class TestSqlite3BulkLoader(unittest.TestCase):
9999
"""Test sqlite3."""
100100

101101
def test_bulkload(self):
102102
"""Tests bulkload into an in-memory database."""
103103
loader = SqliteBulkloader(database_path=":memory:")
104-
loader.rdftab_compatibility = True
104+
loader.rdftab_compatibility = False
105105
loader.bulkload(TEST_INPUT_OWL)
106106
con = loader.connection
107107
cur = con.cursor()
@@ -111,6 +111,13 @@ def test_bulkload(self):
111111
# print(f"S={s}")
112112
for case in CASES:
113113
self.assertIn(case, stmts)
114+
115+
def test_bulkload_compat(self):
116+
loader = SqliteBulkloader(database_path=":memory:")
117+
loader.rdftab_compatibility = True
118+
loader.bulkload(TEST_INPUT_OWL)
119+
con = loader.connection
120+
cur = con.cursor()
114121
cur.execute("select * from statements WHERE subject=:subject", {"subject": NUCLEUS})
115122
stmts = list(cur.fetchall())
116123
# for s in stmts:

0 commit comments

Comments
 (0)