Skip to content

Commit 778e8c3

Browse files
committed
fix(langchain): use min() in _first_label for deterministic label selection
openCypher does not guarantee a stable ordering for labels(), so using labels[0] produced nondeterministic schema entries across refresh_schema() calls. Replace with min(labels) to always select the lexicographically smallest label consistently. Also strengthen the vector_query() integration test: capture the seeded node's internal CoordiNode ID from CREATE RETURN and assert it appears in the returned ChunkNode list, proving the specific seeded node was found rather than any pre-existing Chunk.
1 parent 69fa991 commit 778e8c3

2 files changed

Lines changed: 15 additions & 4 deletions

File tree

langchain-coordinode/langchain_coordinode/graph.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,9 +228,14 @@ def _stable_document_id(source: Any) -> str:
228228

229229

230230
def _first_label(labels: Any) -> str | None:
231-
"""Extract the first label from a labels() result (list of strings)."""
231+
"""Extract a stable label from a labels() result (list of strings).
232+
233+
openCypher does not guarantee a stable ordering for labels(), so using
234+
labels[0] would produce nondeterministic schema entries across calls.
235+
We return the lexicographically smallest label as a deterministic rule.
236+
"""
232237
if isinstance(labels, list) and labels:
233-
return str(labels[0])
238+
return str(min(labels))
234239
if isinstance(labels, str):
235240
return labels
236241
return None

tests/integration/adapters/test_llama_index.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -165,17 +165,23 @@ def test_vector_query_returns_results(store, tag):
165165
vec = [float(i) / 16 for i in range(16)]
166166
# Seed a Chunk node with an embedding directly via Cypher.
167167
# vector_query() defaults label to "Chunk" when no MetadataFilters are provided.
168-
store._client.cypher(
169-
"CREATE (n:Chunk {id: $id, text: $text, embedding: $vec})",
168+
# Capture the internal CoordiNode node ID (returned as integer by RETURN n) so we
169+
# can assert the specific seeded node is retrieved — not just any pre-existing Chunk.
170+
seed_rows = store._client.cypher(
171+
"CREATE (n:Chunk {id: $id, text: $text, embedding: $vec}) RETURN n AS nid",
170172
params={"id": f"vec-{tag}", "text": "test chunk", "vec": vec},
171173
)
174+
seeded_internal_id = str(seed_rows[0]["nid"])
172175
try:
173176
query = VectorStoreQuery(query_embedding=vec, similarity_top_k=1)
174177
nodes, scores = store.vector_query(query)
175178

176179
assert isinstance(nodes, list)
177180
assert isinstance(scores, list)
178181
assert len(nodes) >= 1
182+
# vector_search returns CoordiNode internal node IDs (ChunkNode.id_);
183+
# verify our seeded node is the one found.
184+
assert any(str(getattr(node, "id_", "")) == seeded_internal_id for node in nodes)
179185
assert len(scores) == len(nodes)
180186
assert scores[0] >= 0.0
181187
finally:

0 commit comments

Comments
 (0)