Skip to content

Commit a9ccd79

Browse files
committed
cli
1 parent e21a193 commit a9ccd79

6 files changed

Lines changed: 74 additions & 17 deletions

File tree

poetry.lock

Lines changed: 22 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ click = "^8.1.3"
1313
importlib = "^1.0.4"
1414
lightrdf = "^0.2.1"
1515
curies = "^0.2.0"
16+
prefixmaps = "^0.1.2"
1617

1718
[tool.poetry.dev-dependencies]
1819
pytest = "^7.1.2"
@@ -22,7 +23,7 @@ sphinx-autodoc-typehints = "^1.19.2"
2223
sphinx-click = "^4.3.0"
2324

2425
[tool.poetry.scripts]
25-
rdf-sql-bulkloader = "rdf-sql-bulkloader.cli:main"
26+
rdf-sql-bulkloader = "rdf_sql_bulkloader.cli:main"
2627

2728
[tool.poetry.extras]
2829
docs = [

src/rdf_sql_bulkloader/cli.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
"""Command line interface for rdf-sql-bulkloader."""
2+
from pathlib import Path
3+
24
import click
35
import logging
46

5-
from rdf_sql_bulkloader import __version__
6-
7-
__all__ = [
8-
"main",
9-
]
7+
from rdf_sql_bulkloader import __version__, SqliteBulkloader
108

119
logger = logging.getLogger(__name__)
1210

@@ -29,10 +27,29 @@ def main(verbose: int, quiet: bool):
2927
if quiet:
3028
logger.setLevel(level=logging.ERROR)
3129

30+
3231
@main.command()
33-
def run():
32+
@click.option("--output", "-o", required=True)
33+
@click.option("--force/--no-force",
34+
default=False,
35+
show_default=True,
36+
help="Recreates db if already present")
37+
@click.argument("files", nargs=-1)
38+
def load_sqlite(files, output, force: bool):
3439
"""Run the rdf-sql-bulkloader's demo command."""
35-
demo()
40+
output_path = Path(output)
41+
if output_path.exists():
42+
if force:
43+
output_path.unlink()
44+
else:
45+
raise ValueError(f"Path exists {output_path}")
46+
loader = SqliteBulkloader(output)
47+
if len(files) > 1:
48+
logging.warning(f"Blank nodes may be shared TODO FIX ME")
49+
for file in files:
50+
print(file)
51+
loader.bulkload(file)
52+
3653

3754

3855

src/rdf_sql_bulkloader/loaders/bulkloader.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,12 @@
33
from collections import defaultdict
44
from dataclasses import dataclass
55
from pathlib import Path
6-
from typing import Union, Tuple, Iterator, Mapping, Optional
6+
from typing import Union, Tuple, Iterator, Mapping, Optional, List
77

88
import curies
99
import lightrdf
1010
from curies import Converter
11+
from prefixmaps.io.parser import load_multi_context
1112

1213
re_untyped_literal = re.compile(r'^"(.*)"$')
1314
re_typed_literal = re.compile(r'^"(.*)"\^\^<([\S^"]+)>$')
@@ -72,23 +73,42 @@ def _parse_literal(o: str) -> Tuple[OBJECT_VALUE, OBJECT_DATATYPE, OBJECT_LANG]:
7273
def _parse_literal_as_value(o: str) -> str:
7374
return _parse_literal(o)[0]
7475

76+
77+
7578
@dataclass
7679
class BulkLoader(ABC):
7780
"""
7881
Base class for all bulk loaders
7982
"""
8083
path: str
84+
named_prefix_maps: List[str] = None
8185
prefix_map: PREFIX_MAP = None
8286
converter: Converter = None
8387
index_statements = False
8488

8589
def __post_init__(self):
90+
if self.prefix_map is None:
91+
named_prefix_maps = self.named_prefix_maps
92+
if named_prefix_maps is None:
93+
named_prefix_maps = ["merged"]
94+
if len(named_prefix_maps) > 0:
95+
ctxt = load_multi_context(named_prefix_maps)
96+
self.prefix_map = ctxt.as_dict()
97+
self._set_converter()
98+
99+
def _set_converter(self):
86100
if self.prefix_map:
87101
self.converter = Converter.from_prefix_map(self.prefix_map)
88102

89103
def bulkload(self, path: str):
90104
raise NotImplemented
91105

106+
def _parse_node(self, o: str) -> str:
107+
if re_blank_node.match(o):
108+
return f"_:{o}"
109+
else:
110+
return self.contract_uri(o)
111+
92112
def contract_uri(self, uri: Optional[URI]) -> Optional[str]:
93113
if uri is None:
94114
return None
@@ -139,7 +159,7 @@ def statements(self, path: Union[Path, str]) -> Iterator[STATEMENT]:
139159

140160
# this code could be reduced if https://github.com/ozekik/lightrdf/issues/12 is implemented
141161
for t in doc.search_triples(None, None, None):
142-
s = self.contract_uri(t[0])
162+
s = self._parse_node(t[0])
143163
p = self.contract_uri(t[1])
144164
o = t[2]
145165
o_uri = None
@@ -149,10 +169,7 @@ def statements(self, path: Union[Path, str]) -> Iterator[STATEMENT]:
149169
o_value, o_datatype, o_lang = _parse_literal(o)
150170
else:
151171
o_value = None
152-
if re_blank_node.match(o):
153-
o_uri = f"_:{o}"
154-
else:
155-
o_uri = self.contract_uri(o)
172+
o_uri = self._parse_node(o)
156173
yield s, p, o_uri, o_value, o_datatype, o_lang
157174

158175
def ddl(self) -> str:

src/rdf_sql_bulkloader/loaders/sqlite3_bulkloader.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ def bulkload(self, path: str):
2222
colstr = ",".join(COLS)
2323
qs = ",".join(["?" for _ in COLS])
2424
con.executemany(f"insert into statement({colstr}) values ({qs})", tuples)
25+
con.commit()

tests/test_sqlite3_bulkloader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
CASES =[
1010
(None, 'GO:0005634', 'RO:0002161', 'NCBITaxon:2', None, None, None, None),
11-
(None, 'GO:0005634', 'IAO:0000115', None, "A membrane-bounded organelle of eukaryotic cells in which chromosomes are housed and replicated. In most cells, the nucleus contains all of the cell's chromosomes except the organellar chromosomes, and is the site of RNA synthesis and processing. In some species, or in specialized cell types, RNA metabolism or DNA replication may be absent.", 'http://www.w3.org/2001/XMLSchema#string', None, None)
12-
11+
(None, 'GO:0005634', 'IAO:0000115', None, "A membrane-bounded organelle of eukaryotic cells in which chromosomes are housed and replicated. In most cells, the nucleus contains all of the cell's chromosomes except the organellar chromosomes, and is the site of RNA synthesis and processing. In some species, or in specialized cell types, RNA metabolism or DNA replication may be absent.", 'http://www.w3.org/2001/XMLSchema#string', None, None),
12+
(None, 'GO:0005634', 'rdf:type', 'owl:Class', None, None, None, None),
1313
]
1414

1515
class TestSqlit3BulkLoader(unittest.TestCase):

0 commit comments

Comments
 (0)