Skip to content

Commit 7c4d4c0

Browse files
committed
feat(langchain): add similarity_search() to CoordinodeGraph
Wraps CoordinodeClient.vector_search() with label/property defaults, returning [{id, node, distance}, ...] sorted by ascending distance. Guards against empty query_vector to match server validation behaviour. Adds two integration tests: one seeding a :LCSim node and verifying the seeded node appears in top-k results, one verifying empty-vector returns []. Closes #20
1 parent 3442f3f commit 7c4d4c0

2 files changed

Lines changed: 71 additions & 0 deletions

File tree

langchain-coordinode/langchain_coordinode/graph.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,39 @@ def query(
194194
# cypher() returns List[Dict[str, Any]] directly — column name → value.
195195
return self._client.cypher(query, params=params or {})
196196

197+
def similarity_search(
198+
self,
199+
query_vector: list[float],
200+
k: int = 10,
201+
label: str = "Chunk",
202+
property: str = "embedding",
203+
) -> list[dict[str, Any]]:
204+
"""Find nodes whose ``property`` vector is closest to ``query_vector``.
205+
206+
Wraps ``CoordinodeClient.vector_search()``. The returned list contains
207+
one dict per result with the keys ``node`` (node properties), ``id``
208+
(internal integer node ID), and ``distance`` (cosine distance, lower =
209+
more similar).
210+
211+
Args:
212+
query_vector: Embedding vector to search for.
213+
k: Maximum number of results to return.
214+
label: Node label to search (default ``"Chunk"``).
215+
property: Embedding property name (default ``"embedding"``).
216+
217+
Returns:
218+
List of result dicts sorted by ascending distance.
219+
"""
220+
if not query_vector:
221+
return []
222+
results = self._client.vector_search(
223+
label=label,
224+
property=property,
225+
vector=query_vector,
226+
top_k=k,
227+
)
228+
return [{"id": r.node.id, "node": r.node.properties, "distance": r.distance} for r in results]
229+
197230
# ── Lifecycle ─────────────────────────────────────────────────────────
198231

199232
def close(self) -> None:

tests/integration/adapters/test_langchain.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,44 @@ def test_add_graph_documents_idempotent(graph, unique_tag):
133133
assert result[0]["cnt"] == 1
134134

135135

136+
# ── similarity_search ─────────────────────────────────────────────────────────
137+
138+
139+
def test_similarity_search_returns_results(graph, unique_tag):
140+
"""similarity_search() returns node dicts with id, node, and distance keys.
141+
142+
Seeds a :LCSim node with a known embedding, then searches for the closest
143+
vector. The seeded node must appear in the top-k results.
144+
"""
145+
# Derive a unique embedding from the test tag (same technique as llama-index
146+
# test) to avoid collisions with other :LCSim nodes in the shared DB.
147+
seed = list(bytes.fromhex(unique_tag))
148+
vec = [float(seed[i % len(seed)]) / 255.0 for i in range(16)]
149+
150+
try:
151+
seed_rows = graph.query(
152+
"CREATE (n:LCSim {id: $id, embedding: $vec}) RETURN n AS nid",
153+
params={"id": f"lcsim-{unique_tag}", "vec": vec},
154+
)
155+
seeded_internal_id = seed_rows[0]["nid"]
156+
157+
results = graph.similarity_search(vec, k=5, label="LCSim", property="embedding")
158+
159+
assert isinstance(results, list)
160+
assert len(results) >= 1
161+
assert all("id" in r and "node" in r and "distance" in r for r in results)
162+
assert any(r["id"] == seeded_internal_id for r in results)
163+
assert results[0]["distance"] >= 0.0
164+
finally:
165+
graph.query("MATCH (n:LCSim {id: $id}) DELETE n", params={"id": f"lcsim-{unique_tag}"})
166+
167+
168+
def test_similarity_search_empty_vector_returns_empty(graph):
169+
"""similarity_search() with an empty vector list returns an empty list without error."""
170+
results = graph.similarity_search([], k=5)
171+
assert isinstance(results, list)
172+
173+
136174
def test_schema_refreshes_after_add(graph, unique_tag):
137175
"""structured_schema is invalidated and re-fetched after add_graph_documents."""
138176
graph._schema = None # force refresh

0 commit comments

Comments
 (0)