Skip to content

Commit 8953ce5

Browse files
committed
refactor: extract helpers, fix hash encoding, reduce cognitive complexity
- graph.py: extract _upsert_node(), _create_edge(), _link_document_to_entities() from add_graph_documents() — reduces cognitive complexity from 23 to ~8 - graph.py: _stable_document_id() now uses json.dumps(sort_keys=True) instead of string concat with "|"/"=" — fixes delimiter ambiguity for nested metadata - base.py: _parse_edge_types_from_schema() refactored to use iter() with two for-loops instead of boolean flag — reduces complexity from 16 to 14 - base.py: get_triplets() docstring documents that relation_names is required
1 parent 64a2877 commit 8953ce5

2 files changed

Lines changed: 89 additions & 64 deletions

File tree

  • langchain-coordinode/langchain_coordinode
  • llama-index-coordinode/llama_index/graph_stores/coordinode

langchain-coordinode/langchain_coordinode/graph.py

Lines changed: 66 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from __future__ import annotations
44

55
import hashlib
6+
import json
67
import re
78
from typing import Any
89

@@ -111,63 +112,70 @@ def add_graph_documents(
111112
``MENTIONS`` edges (also unconditional ``CREATE``).
112113
"""
113114
for doc in graph_documents:
114-
# ── Upsert nodes ──────────────────────────────────────────────
115115
for node in doc.nodes:
116-
label = _cypher_ident(node.type or "Entity")
117-
props = dict(node.properties or {})
118-
# Always enforce node.id as the merge key; incoming
119-
# properties["name"] must not drift from the MERGE predicate.
120-
props["name"] = node.id
121-
self._client.cypher(
122-
f"MERGE (n:{label} {{name: $name}}) SET n += $props",
123-
params={"name": node.id, "props": props},
124-
)
125-
126-
# ── Create relationships ──────────────────────────────────────
116+
self._upsert_node(node)
127117
for rel in doc.relationships:
128-
src_label = _cypher_ident(rel.source.type or "Entity")
129-
dst_label = _cypher_ident(rel.target.type or "Entity")
130-
rel_type = _cypher_ident(rel.type)
131-
props = dict(rel.properties or {})
132-
# CoordiNode does not support MERGE for edges or WHERE NOT
133-
# (pattern) guards — use unconditional CREATE. SET r += $props
134-
# is skipped when props is empty because SET r += {} is not
135-
# supported by all server versions.
136-
if props:
137-
self._client.cypher(
138-
f"MATCH (src:{src_label} {{name: $src}}) "
139-
f"MATCH (dst:{dst_label} {{name: $dst}}) "
140-
f"CREATE (src)-[r:{rel_type}]->(dst) SET r += $props",
141-
params={"src": rel.source.id, "dst": rel.target.id, "props": props},
142-
)
143-
else:
144-
self._client.cypher(
145-
f"MATCH (src:{src_label} {{name: $src}}) "
146-
f"MATCH (dst:{dst_label} {{name: $dst}}) "
147-
f"CREATE (src)-[r:{rel_type}]->(dst)",
148-
params={"src": rel.source.id, "dst": rel.target.id},
149-
)
150-
151-
# ── Optionally link source document ───────────────────────────
118+
self._create_edge(rel)
152119
if include_source and doc.source:
153-
src_id = getattr(doc.source, "id", None) or _stable_document_id(doc.source)
154-
self._client.cypher(
155-
"MERGE (d:__Document__ {id: $id}) SET d.page_content = $text",
156-
params={"id": src_id, "text": doc.source.page_content or ""},
157-
)
158-
for node in doc.nodes:
159-
label = _cypher_ident(node.type or "Entity")
160-
self._client.cypher(
161-
f"MATCH (d:__Document__ {{id: $doc_id}}) "
162-
f"MATCH (n:{label} {{name: $name}}) "
163-
f"CREATE (d)-[:MENTIONS]->(n)",
164-
params={"doc_id": src_id, "name": node.id},
165-
)
120+
self._link_document_to_entities(doc)
166121

167122
# Invalidate cached schema so next access reflects new data
168123
self._schema = None
169124
self._structured_schema = None
170125

126+
def _upsert_node(self, node: Any) -> None:
127+
"""Upsert a single node by ``id`` via MERGE."""
128+
label = _cypher_ident(node.type or "Entity")
129+
props = dict(node.properties or {})
130+
# Always enforce node.id as the merge key; incoming
131+
# properties["name"] must not drift from the MERGE predicate.
132+
props["name"] = node.id
133+
self._client.cypher(
134+
f"MERGE (n:{label} {{name: $name}}) SET n += $props",
135+
params={"name": node.id, "props": props},
136+
)
137+
138+
def _create_edge(self, rel: Any) -> None:
139+
"""Create a relationship via unconditional CREATE.
140+
141+
CoordiNode does not support MERGE for edge patterns. Re-ingesting the
142+
same relationship will create a duplicate edge. SET r += $props is
143+
skipped when props is empty because SET r += {} is not supported by all
144+
server versions.
145+
"""
146+
src_label = _cypher_ident(rel.source.type or "Entity")
147+
dst_label = _cypher_ident(rel.target.type or "Entity")
148+
rel_type = _cypher_ident(rel.type)
149+
props = dict(rel.properties or {})
150+
if props:
151+
self._client.cypher(
152+
f"MATCH (src:{src_label} {{name: $src}}) "
153+
f"MATCH (dst:{dst_label} {{name: $dst}}) "
154+
f"CREATE (src)-[r:{rel_type}]->(dst) SET r += $props",
155+
params={"src": rel.source.id, "dst": rel.target.id, "props": props},
156+
)
157+
else:
158+
self._client.cypher(
159+
f"MATCH (src:{src_label} {{name: $src}}) "
160+
f"MATCH (dst:{dst_label} {{name: $dst}}) "
161+
f"CREATE (src)-[r:{rel_type}]->(dst)",
162+
params={"src": rel.source.id, "dst": rel.target.id},
163+
)
164+
165+
def _link_document_to_entities(self, doc: Any) -> None:
166+
"""Upsert a ``__Document__`` node and CREATE ``MENTIONS`` edges to all entities."""
167+
src_id = getattr(doc.source, "id", None) or _stable_document_id(doc.source)
168+
self._client.cypher(
169+
"MERGE (d:__Document__ {id: $id}) SET d.page_content = $text",
170+
params={"id": src_id, "text": doc.source.page_content or ""},
171+
)
172+
for node in doc.nodes:
173+
label = _cypher_ident(node.type or "Entity")
174+
self._client.cypher(
175+
f"MATCH (d:__Document__ {{id: $doc_id}}) MATCH (n:{label} {{name: $name}}) CREATE (d)-[:MENTIONS]->(n)",
176+
params={"doc_id": src_id, "name": node.id},
177+
)
178+
171179
def query(
172180
self,
173181
query: str,
@@ -213,8 +221,15 @@ def _stable_document_id(source: Any) -> str:
213221
"""
214222
content = getattr(source, "page_content", "") or ""
215223
metadata = getattr(source, "metadata", {}) or {}
216-
stable = content + "|" + "|".join(f"{k}={v}" for k, v in sorted(metadata.items()))
217-
return hashlib.sha256(stable.encode()).hexdigest()[:32]
224+
# Use canonical JSON encoding to avoid delimiter ambiguity and ensure
225+
# determinism for nested/non-scalar metadata values.
226+
canonical = json.dumps(
227+
{"content": content, "metadata": metadata},
228+
sort_keys=True,
229+
separators=(",", ":"),
230+
ensure_ascii=False,
231+
)
232+
return hashlib.sha256(canonical.encode()).hexdigest()[:32]
218233

219234

220235
def _cypher_ident(name: str) -> str:

llama-index-coordinode/llama_index/graph_stores/coordinode/base.py

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,13 @@ def get_triplets(
117117
properties: dict[str, Any] | None = None,
118118
ids: list[str] | None = None,
119119
) -> list[list[LabelledNode]]:
120-
"""Retrieve triplets (subject, predicate, object) as node triples."""
120+
"""Retrieve triplets (subject, predicate, object) as node triples.
121+
122+
Note:
123+
``relation_names`` is **required**. CoordiNode does not support
124+
untyped wildcard ``[r]`` relationship patterns — they silently return
125+
no rows. Omitting ``relation_names`` raises ``NotImplementedError``.
126+
"""
121127
conditions: list[str] = []
122128
params: dict[str, Any] = {}
123129

@@ -377,17 +383,21 @@ def _parse_edge_types_from_schema(schema_text: str) -> list[str]:
377383
Parses the "Edge types:" section produced by ``get_schema_text()``.
378384
"""
379385
edge_types: list[str] = []
380-
in_edges = False
381-
for line in schema_text.splitlines():
386+
lines = iter(schema_text.splitlines())
387+
388+
# Advance to the "Edge types:" header.
389+
for line in lines:
390+
if line.strip().lower().startswith("edge types"):
391+
break
392+
393+
# Collect bullet items until the first blank line.
394+
for line in lines:
382395
stripped = line.strip()
383-
if stripped.lower().startswith("edge types"):
384-
in_edges = True
385-
continue
386-
if in_edges:
387-
if not stripped:
388-
break
389-
if stripped.startswith("-") or stripped.startswith("*"):
390-
name = stripped.lstrip("-* ").split("(")[0].strip()
391-
if name:
392-
edge_types.append(name)
396+
if not stripped:
397+
break
398+
if stripped.startswith(("-", "*")):
399+
name = stripped.lstrip("-* ").split("(")[0].strip()
400+
if name:
401+
edge_types.append(name)
402+
393403
return edge_types

0 commit comments

Comments
 (0)