Skip to content

Commit 1c64499

Browse files
committed
fix(llama-index): prevent Cypher injection via backtick-escaped identifiers
- add _cypher_ident(): backtick-escapes labels, rel-types, property keys - get(): lookup nodes by n.id (string) instead of get_node() (expects int) - get(): escape property keys in WHERE clause - get_triplets(): escape relation_names; raise NotImplementedError for properties/ids - upsert_relations(): escape rel.label - get_rel_map(): implement ignore_rels Python-side filter - delete(): raise NotImplementedError for relation_names/properties fix(client): parse host:port regardless of default port value fix(build): portable sed -i.bak instead of macOS-only sed -i '' fix(build): sync .PHONY — install-uv to install-pip fix(release): remove unused outputs from release-please.yml fix(release): remove trailing blank line in release.yml feat(coordinode): import __version__ from hatch-vcs _version.py test(integration): split compound assert for clearer failure messages
1 parent bc156c3 commit 1c64499

8 files changed

Lines changed: 54 additions & 24 deletions

File tree

.github/workflows/release-please.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ permissions:
1212
jobs:
1313
release-please:
1414
runs-on: ubuntu-latest
15-
outputs:
16-
release_created: ${{ steps.rp.outputs.release_created }}
17-
tag_name: ${{ steps.rp.outputs.tag_name }}
1815
steps:
1916
- uses: googleapis/release-please-action@v4
2017
id: rp

.github/workflows/release.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,3 @@ jobs:
9595
prerelease: ${{ contains(github.ref_name, 'a') || contains(github.ref_name, 'b') || contains(github.ref_name, 'rc') }}
9696
generate_release_notes: true
9797
files: dist/*
98-

Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
.PHONY: proto proto-check install install-uv test test-unit test-integration lint clean
1+
.PHONY: proto proto-check install install-pip test test-unit test-integration lint clean
22

33
PROTO_SRC := proto
44
PROTO_OUT := coordinode/_proto
@@ -16,8 +16,11 @@ proto:
1616
@# Add __init__.py to every generated package directory
1717
@find $(PROTO_OUT) -type d -exec touch {}/__init__.py \;
1818
@# Fix absolute imports in all generated pb2 files (grpc_tools generates absolute paths)
19-
@find $(PROTO_OUT) -name '*.py' -exec sed -i '' \
19+
@# sed -i.bak is portable: macOS needs empty-string backup arg, GNU sed uses -i alone;
20+
@# using .bak suffix works on both, then we clean up the backup files.
21+
@find $(PROTO_OUT) -name '*.py' -exec sed -i.bak \
2022
's/from coordinode\./from coordinode._proto.coordinode./g' {} \;
23+
@find $(PROTO_OUT) -name '*.py.bak' -delete
2124
@echo "==> Proto generation complete: $(PROTO_OUT)/"
2225

2326
proto-check:

coordinode/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,10 @@
2626
VectorResult,
2727
)
2828

29-
__version__ = "0.3.0a1"
29+
try:
30+
from coordinode._version import __version__
31+
except ImportError:
32+
__version__ = "0.0.0" # fallback for editable installs without hatch-vcs
3033
__all__ = [
3134
"CoordinodeClient",
3235
"AsyncCoordinodeClient",

coordinode/_types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ def to_property_value(py_val: PyValue) -> Any:
3535
pv.bytes_value = py_val
3636
elif isinstance(py_val, list | tuple):
3737
# Homogeneous float list → Vector; mixed/str list → PropertyList
38+
# isinstance() with X|Y union syntax is valid from Python 3.10+ (PEP 604).
39+
# This package requires Python >=3.11, so no tuple-of-types workaround needed.
3840
if py_val and all(isinstance(v, int | float) for v in py_val):
3941
vec = Vector(values=[float(v) for v in py_val])
4042
pv.vector_value.CopyFrom(vec)

coordinode/client.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,9 +102,13 @@ def __init__(
102102
timeout: float = 30.0,
103103
) -> None:
104104
# Support "host:port" as a single string (common gRPC convention).
105-
if ":" in host and port == 7080:
105+
# Parse whenever the last colon-delimited segment is numeric, regardless
106+
# of default port. IPv6 bracket notation ([::1]:7080) is handled correctly
107+
# by rsplit(":", 1): "[::1]" + "7080".
108+
if ":" in host:
106109
_h, _p = host.rsplit(":", 1)
107-
host, port = _h, int(_p)
110+
if _p.isdigit():
111+
host, port = _h, int(_p)
108112
self._host = host
109113
self._port = port
110114
self._tls = tls

llama-index-coordinode/llama_index/graph_stores/coordinode/base.py

Lines changed: 35 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@
1515
from llama_index.core.vector_stores.types import VectorStoreQuery
1616

1717

18+
def _cypher_ident(value: str) -> str:
19+
"""Backtick-escape a Cypher identifier (label, rel-type, property key).
20+
21+
Doubles any embedded backticks per the OpenCypher spec so that arbitrary
22+
strings can be used safely as identifiers without Cypher injection.
23+
"""
24+
return f"`{value.replace('`', '``')}`"
25+
26+
1827
class CoordinodePropertyGraphStore(PropertyGraphStore):
1928
"""LlamaIndex ``PropertyGraphStore`` backed by CoordiNode.
2029
@@ -60,17 +69,21 @@ def get(
6069
nodes: list[LabelledNode] = []
6170

6271
if ids:
63-
for node_id in ids:
64-
result = self._client.get_node(node_id)
65-
if result is not None:
66-
nodes.append(_node_result_to_labelled(node_id, result))
72+
# Query by the stored n.id property (string adapter ID), not by the
73+
# graph-internal integer node ID that get_node() expects.
74+
cypher = "MATCH (n) WHERE n.id IN $ids RETURN n, n.id AS _nid LIMIT 1000"
75+
result = self._client.cypher(cypher, params={"ids": ids})
76+
for row in result:
77+
node_data = row.get("n", {})
78+
node_id = str(row.get("_nid", ""))
79+
nodes.append(_node_result_to_labelled(node_id, node_data))
6780
elif properties:
68-
where_clauses = " AND ".join(f"n.{k} = ${k}" for k in properties)
69-
cypher = f"MATCH (n) WHERE {where_clauses} RETURN n, id(n) AS _id LIMIT 1000"
81+
where_clauses = " AND ".join(f"n.{_cypher_ident(k)} = ${k}" for k in properties)
82+
cypher = f"MATCH (n) WHERE {where_clauses} RETURN n, n.id AS _nid LIMIT 1000"
7083
result = self._client.cypher(cypher, params=properties)
7184
for row in result:
7285
node_data = row.get("n", {})
73-
node_id = str(row.get("_id", ""))
86+
node_id = str(row.get("_nid", ""))
7487
nodes.append(_node_result_to_labelled(node_id, node_data))
7588

7689
return nodes
@@ -86,12 +99,14 @@ def get_triplets(
8699
conditions: list[str] = []
87100
params: dict[str, Any] = {}
88101

102+
if properties or ids:
103+
raise NotImplementedError("get_triplets() does not yet support filtering by properties or ids")
89104
if entity_names:
90105
conditions.append("(n.name IN $entity_names OR m.name IN $entity_names)")
91106
params["entity_names"] = entity_names
92107
if relation_names:
93-
rel_filter = "|".join(relation_names)
94-
# Inline into pattern — CoordiNode supports dynamic type lists
108+
# Escape each type name to prevent Cypher injection
109+
rel_filter = "|".join(_cypher_ident(t) for t in relation_names)
95110
rel_pattern = f"[r:{rel_filter}]"
96111
else:
97112
rel_pattern = "[r]"
@@ -129,17 +144,16 @@ def get_rel_map(
129144
if not graph_nodes:
130145
return []
131146

132-
ids = [n.id for n in graph_nodes]
133-
# ignore_rels: OpenCypher doesn't support dynamic type exclusion in patterns;
134-
# would require WHERE NOT type(r) IN $ignore_rels — added when needed.
147+
node_ids = [n.id for n in graph_nodes]
148+
ignored = set(ignore_rels) if ignore_rels else set()
135149

136150
cypher = (
137151
f"MATCH (n)-[r*1..{depth}]->(m) "
138152
f"WHERE id(n) IN $ids "
139153
f"RETURN n, r, m, id(n) AS _src_id, id(m) AS _dst_id "
140154
f"LIMIT {limit}"
141155
)
142-
result = self._client.cypher(cypher, params={"ids": ids})
156+
result = self._client.cypher(cypher, params={"ids": node_ids})
143157

144158
triplets: list[list[LabelledNode]] = []
145159
for row in result:
@@ -149,6 +163,10 @@ def get_rel_map(
149163
dst_id = str(row.get("_dst_id", ""))
150164
# Variable-length path [r*1..N] returns a list of relationship dicts.
151165
rels = row.get("r", [])
166+
# Skip paths that contain any ignored relationship type.
167+
if ignored and isinstance(rels, list):
168+
if any(isinstance(r, dict) and r.get("type") in ignored for r in rels):
169+
continue
152170
if isinstance(rels, list) and rels:
153171
first_rel = rels[0]
154172
rel_label = first_rel.get("type", "RELATED") if isinstance(first_rel, dict) else str(first_rel)
@@ -174,7 +192,8 @@ def upsert_relations(self, relations: list[Relation]) -> None:
174192
for rel in relations:
175193
props = rel.properties or {}
176194
cypher = (
177-
f"MATCH (src {{id: $src_id}}), (dst {{id: $dst_id}}) MERGE (src)-[r:{rel.label}]->(dst) SET r += $props"
195+
f"MATCH (src {{id: $src_id}}), (dst {{id: $dst_id}}) "
196+
f"MERGE (src)-[r:{_cypher_ident(rel.label)}]->(dst) SET r += $props"
178197
)
179198
self._client.cypher(
180199
cypher,
@@ -193,6 +212,8 @@ def delete(
193212
ids: list[str] | None = None,
194213
) -> None:
195214
"""Delete nodes and/or relations matching given criteria."""
215+
if relation_names or properties:
216+
raise NotImplementedError("delete() does not yet support filtering by relation_names or properties")
196217
if ids:
197218
cypher = "MATCH (n) WHERE id(n) IN $ids DETACH DELETE n"
198219
self._client.cypher(cypher, params={"ids": ids})

tests/integration/test_basic.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ def test_create_and_get_node(client):
4949
"MATCH (n:IntegrationTest {name: $name}) RETURN n.name AS name",
5050
params={"name": "sdk-test-node"},
5151
)
52-
assert found and found[0]["name"] == "sdk-test-node"
52+
assert found, "MATCH returned no rows"
53+
assert found[0]["name"] == "sdk-test-node"
5354

5455
# Clean up
5556
client.cypher(

0 commit comments

Comments
 (0)