Skip to content

Commit 13487c9

Browse files
committed
fix(langchain): enforce node.id as merge key; stable document IDs
- props["name"] = node.id overwrites incoming properties["name"] so the MERGE predicate and stored name always match; prevents duplicate nodes and broken relationship MATCH on re-ingest - Add _stable_document_id(): SHA-256(page_content + metadata) makes include_source=True idempotent across Python processes test(adapters): tighten integration assertions for idempotency and rels - test_add_graph_documents_creates_relationship: assert LC_RESEARCHES edge exists, not just source node presence - test_add_graph_documents_idempotent: cnt == 1 to catch duplicates - test_upsert_nodes_idempotent: len == 1 to catch duplicates - test_get_rel_map: assert len >= 1 to verify non-empty result
1 parent b1fd70d commit 13487c9

3 files changed

Lines changed: 26 additions & 8 deletions

File tree

langchain-coordinode/langchain_coordinode/graph.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import hashlib
56
import re
67
from typing import Any
78

@@ -104,7 +105,9 @@ def add_graph_documents(
104105
for node in doc.nodes:
105106
label = _cypher_ident(node.type or "Entity")
106107
props = dict(node.properties or {})
107-
props.setdefault("name", node.id)
108+
# Always enforce node.id as the merge key; incoming
109+
# properties["name"] must not drift from the MERGE predicate.
110+
props["name"] = node.id
108111
self._client.cypher(
109112
f"MERGE (n:{label} {{name: $name}}) SET n += $props",
110113
params={"name": node.id, "props": props},
@@ -138,7 +141,7 @@ def add_graph_documents(
138141

139142
# ── Optionally link source document ───────────────────────────
140143
if include_source and doc.source:
141-
src_id = getattr(doc.source, "id", None) or str(id(doc.source))
144+
src_id = getattr(doc.source, "id", None) or _stable_document_id(doc.source)
142145
self._client.cypher(
143146
"MERGE (d:__Document__ {id: $id}) SET d.page_content = $text",
144147
params={"id": src_id, "text": doc.source.page_content or ""},
@@ -198,6 +201,19 @@ def __exit__(self, *args: Any) -> None:
198201
# ── Schema parser ─────────────────────────────────────────────────────────
199202

200203

204+
def _stable_document_id(source: Any) -> str:
205+
"""Return a deterministic ID for a LangChain Document.
206+
207+
Combines ``page_content`` and sorted ``metadata`` items so the same
208+
document produces the same node across different Python processes,
209+
making ``include_source=True`` re-ingest truly idempotent.
210+
"""
211+
content = getattr(source, "page_content", "") or ""
212+
metadata = getattr(source, "metadata", {}) or {}
213+
stable = content + "|" + "|".join(f"{k}={v}" for k, v in sorted(metadata.items()))
214+
return hashlib.sha256(stable.encode()).hexdigest()[:32]
215+
216+
201217
def _cypher_ident(name: str) -> str:
202218
"""Escape a label/type name for use as a Cypher identifier."""
203219
# If already safe (alphanumeric + underscore, not starting with digit) keep as-is

tests/integration/adapters/test_langchain.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,13 @@ def test_add_graph_documents_creates_relationship(graph, unique_tag):
9393

9494
graph.add_graph_documents([doc])
9595

96-
# Verify both nodes exist
96+
# Verify the relationship was created, not just the source node.
9797
result = graph.query(
98-
"MATCH (n:LCPerson2 {name: $name}) RETURN n.name AS name",
99-
params={"name": f"Charlie-{unique_tag}"},
98+
"MATCH (a:LCPerson2 {name: $src})-[r:LC_RESEARCHES]->(b:LCConcept {name: $dst}) "
99+
"RETURN count(r) AS cnt",
100+
params={"src": f"Charlie-{unique_tag}", "dst": f"GraphRAG-{unique_tag}"},
100101
)
101-
assert len(result) >= 1, f"source node not found: {result}"
102+
assert result[0]["cnt"] >= 1, f"relationship not found: {result}"
102103

103104

104105
def test_add_graph_documents_idempotent(graph, unique_tag):
@@ -113,7 +114,7 @@ def test_add_graph_documents_idempotent(graph, unique_tag):
113114
"MATCH (n:LCIdempotent {name: $name}) RETURN count(n) AS cnt",
114115
params={"name": f"Idempotent-{unique_tag}"},
115116
)
116-
assert result[0]["cnt"] >= 1
117+
assert result[0]["cnt"] == 1
117118

118119

119120
def test_schema_refreshes_after_add(graph, unique_tag):

tests/integration/adapters/test_llama_index.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def test_upsert_nodes_idempotent(store, tag):
6666
store.upsert_nodes([node]) # second call must not raise
6767

6868
found = store.get(properties={"name": f"Idem-{tag}"})
69-
assert len(found) >= 1
69+
assert len(found) == 1
7070

7171

7272
def test_get_by_id(store, tag):
@@ -116,6 +116,7 @@ def test_get_rel_map(store, tag):
116116

117117
result = store.get_rel_map([src], depth=1, limit=10)
118118
assert isinstance(result, list)
119+
assert len(result) >= 1
119120

120121

121122
# ── Delete ────────────────────────────────────────────────────────────────────

0 commit comments

Comments
 (0)