Skip to content

Commit 50fb1f1

Browse files
committed
fix(langchain): implement add_graph_documents and use __label__/__type__
- Implement add_graph_documents() which was inherited as a no-op from GraphStore base class; upserts nodes via MERGE and creates edges via MATCH+CREATE with fallback when WHERE NOT guard is unsupported - Fix refresh_schema() relationship enrichment query: labels()/type() Cypher functions return null in CoordiNode; use a.__label__ and r.__type__ internal properties instead - Add _cypher_ident() helper to safely escape label/type names Closes #14
1 parent ce93640 commit 50fb1f1

1 file changed

Lines changed: 91 additions & 1 deletion

File tree

  • langchain-coordinode/langchain_coordinode

langchain-coordinode/langchain_coordinode/graph.py

Lines changed: 91 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ def refresh_schema(self) -> None:
7272
# Cypher — get_schema_text() only lists edge types without direction.
7373
try:
7474
rows = self._client.cypher(
75-
"MATCH (a)-[r]->(b) RETURN DISTINCT labels(a)[0] AS src, type(r) AS rel, labels(b)[0] AS dst"
75+
"MATCH (a)-[r]->(b) RETURN DISTINCT a.__label__ AS src, r.__type__ AS rel, b.__label__ AS dst"
7676
)
7777
structured["relationships"] = [
7878
{"start": row["src"], "type": row["rel"], "end": row["dst"]}
@@ -83,6 +83,88 @@ def refresh_schema(self) -> None:
8383
pass # Graph may have no relationships yet; structured["relationships"] stays []
8484
self._structured_schema = structured
8585

86+
def add_graph_documents(
87+
self,
88+
graph_documents: list[Any],
89+
include_source: bool = False,
90+
) -> None:
91+
"""Store nodes and relationships extracted from ``GraphDocument`` objects.
92+
93+
Nodes are upserted by ``id`` (used as the ``name`` property).
94+
Relationships are created between existing nodes; if a relationship
95+
between the same source and target already exists it is skipped.
96+
97+
Args:
98+
graph_documents: List of ``langchain_community.graphs.graph_document.GraphDocument``.
99+
include_source: If ``True``, also store the source ``Document`` as a
100+
``__Document__`` node linked to every extracted entity.
101+
"""
102+
for doc in graph_documents:
103+
# ── Upsert nodes ──────────────────────────────────────────────
104+
for node in doc.nodes:
105+
label = _cypher_ident(node.type or "Entity")
106+
props = dict(node.properties or {})
107+
props.setdefault("name", node.id)
108+
self._client.cypher(
109+
f"MERGE (n:{label} {{name: $name}}) SET n += $props",
110+
params={"name": node.id, "props": props},
111+
)
112+
113+
# ── Create relationships (idempotent: skip if already exists) ─
114+
for rel in doc.relationships:
115+
src_label = _cypher_ident(rel.source.type or "Entity")
116+
dst_label = _cypher_ident(rel.target.type or "Entity")
117+
rel_type = _cypher_ident(rel.type)
118+
props = dict(rel.properties or {})
119+
# CoordiNode does not yet support MERGE for edges; use CREATE
120+
# guarded by NOT EXISTS to avoid duplicates on repeated calls.
121+
try:
122+
self._client.cypher(
123+
f"MATCH (src:{src_label} {{name: $src}}) "
124+
f"MATCH (dst:{dst_label} {{name: $dst}}) "
125+
f"WHERE NOT (src)-[:{rel_type}]->(dst) "
126+
f"CREATE (src)-[r:{rel_type}]->(dst) SET r += $props",
127+
params={"src": rel.source.id, "dst": rel.target.id, "props": props},
128+
)
129+
except Exception: # noqa: BLE001
130+
# WHERE NOT EXISTS guard may not be supported on all server
131+
# versions; fall back to unconditional CREATE
132+
self._client.cypher(
133+
f"MATCH (src:{src_label} {{name: $src}}) "
134+
f"MATCH (dst:{dst_label} {{name: $dst}}) "
135+
f"CREATE (src)-[r:{rel_type}]->(dst) SET r += $props",
136+
params={"src": rel.source.id, "dst": rel.target.id, "props": props},
137+
)
138+
139+
# ── Optionally link source document ───────────────────────────
140+
if include_source and doc.source:
141+
src_id = getattr(doc.source, "id", None) or str(id(doc.source))
142+
self._client.cypher(
143+
"MERGE (d:__Document__ {id: $id}) SET d.page_content = $text",
144+
params={"id": src_id, "text": doc.source.page_content or ""},
145+
)
146+
for node in doc.nodes:
147+
label = _cypher_ident(node.type or "Entity")
148+
try:
149+
self._client.cypher(
150+
f"MATCH (d:__Document__ {{id: $doc_id}}) "
151+
f"MATCH (n:{label} {{name: $name}}) "
152+
f"WHERE NOT (d)-[:MENTIONS]->(n) "
153+
f"CREATE (d)-[:MENTIONS]->(n)",
154+
params={"doc_id": src_id, "name": node.id},
155+
)
156+
except Exception: # noqa: BLE001
157+
self._client.cypher(
158+
f"MATCH (d:__Document__ {{id: $doc_id}}) "
159+
f"MATCH (n:{label} {{name: $name}}) "
160+
f"CREATE (d)-[:MENTIONS]->(n)",
161+
params={"doc_id": src_id, "name": node.id},
162+
)
163+
164+
# Invalidate cached schema so next access reflects new data
165+
self._schema = None
166+
self._structured_schema = None
167+
86168
def query(
87169
self,
88170
query: str,
@@ -116,6 +198,14 @@ def __exit__(self, *args: Any) -> None:
116198
# ── Schema parser ─────────────────────────────────────────────────────────
117199

118200

201+
def _cypher_ident(name: str) -> str:
202+
"""Escape a label/type name for use as a Cypher identifier."""
203+
# If already safe (alphanumeric + underscore, not starting with digit) keep as-is
204+
if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name):
205+
return name
206+
return f"`{name.replace('`', '``')}`"
207+
208+
119209
def _parse_schema(schema_text: str) -> dict[str, Any]:
120210
"""Convert CoordiNode schema text into LangChain's structured format.
121211

0 commit comments

Comments
 (0)