Skip to content

Commit 27434ee

Browse files
committed
feat(sdk,tests): add create_text_index/drop_text_index, fix FTS test assumptions
FTS indexing is NOT automatic — a CREATE TEXT INDEX DDL is required before text_search() can return results. The previous code and tests incorrectly assumed automatic indexing. SDK changes: - Add TextIndexInfo result class (name, label, properties, documents_indexed) - Add AsyncCoordinodeClient.create_text_index() and drop_text_index() - Add CoordinodeClient sync wrappers for both methods - Export TextIndexInfo from coordinode package - Fix text_search() docstring: document DDL requirement, remove false claim about automatic indexing Test changes: - Fix test_text_search_returns_results: create/drop TEXT INDEX around the search call; drop empty-results xfail (now a hard assertion) - Fix test_text_search_fuzzy: create/drop TEXT INDEX; assert results are non-empty (was: just verify no exception) - Fix test_hybrid_text_vector_search_returns_results: add TEXT INDEX - Update header comment: remove 'automatic indexing' statement
1 parent e06be1f commit 27434ee

3 files changed

Lines changed: 118 additions & 15 deletions

File tree

coordinode/coordinode/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
LabelInfo,
2828
NodeResult,
2929
PropertyDefinitionInfo,
30+
TextIndexInfo,
3031
TextResult,
3132
TraverseResult,
3233
VectorResult,
@@ -47,5 +48,6 @@
4748
"LabelInfo",
4849
"EdgeTypeInfo",
4950
"PropertyDefinitionInfo",
51+
"TextIndexInfo",
5052
"TraverseResult",
5153
]

coordinode/coordinode/client.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,23 @@ def __repr__(self) -> str:
161161
return f"TraverseResult(nodes={len(self.nodes)}, edges={len(self.edges)})"
162162

163163

164+
class TextIndexInfo:
165+
"""Information about a full-text index returned by :meth:`create_text_index`."""
166+
167+
def __init__(self, row: dict[str, Any]) -> None:
168+
self.name: str = str(row.get("index", ""))
169+
self.label: str = str(row.get("label", ""))
170+
self.properties: str = str(row.get("properties", ""))
171+
self.default_language: str = str(row.get("default_language", ""))
172+
self.documents_indexed: int = int(row.get("documents_indexed", 0))
173+
174+
def __repr__(self) -> str:
175+
return (
176+
f"TextIndexInfo(name={self.name!r}, label={self.label!r},"
177+
f" properties={self.properties!r}, documents_indexed={self.documents_indexed})"
178+
)
179+
180+
164181
# ── Async client ─────────────────────────────────────────────────────────────
165182

166183

@@ -524,6 +541,57 @@ async def create_edge_type(
524541
et = await self._schema_stub.CreateEdgeType(req, timeout=self._timeout)
525542
return EdgeTypeInfo(et)
526543

544+
async def create_text_index(
545+
self,
546+
name: str,
547+
label: str,
548+
properties: str | list[str],
549+
*,
550+
language: str = "",
551+
) -> TextIndexInfo:
552+
"""Create a full-text (BM25) index on one or more node properties.
553+
554+
Args:
555+
name: Unique index name (e.g. ``"article_body"``).
556+
label: Node label to index (e.g. ``"Article"``).
557+
properties: Property name or list of property names to index
558+
(e.g. ``"body"`` or ``["title", "body"]``).
559+
language: Default stemming/tokenization language (e.g. ``"english"``,
560+
``"russian"``). Empty string uses the server default
561+
(``"english"``).
562+
563+
Returns:
564+
:class:`TextIndexInfo` with index metadata and document count.
565+
566+
Example::
567+
568+
info = await client.create_text_index("article_body", "Article", "body")
569+
# then: results = await client.text_search("Article", "machine learning")
570+
"""
571+
if isinstance(properties, str):
572+
prop_list = [properties]
573+
else:
574+
prop_list = list(properties)
575+
props_expr = ", ".join(prop_list)
576+
lang_clause = f" DEFAULT LANGUAGE {language}" if language else ""
577+
cypher = f"CREATE TEXT INDEX {name} ON :{label}({props_expr}){lang_clause}"
578+
rows = await self.cypher(cypher)
579+
if rows:
580+
return TextIndexInfo(rows[0])
581+
return TextIndexInfo({"index": name, "label": label, "properties": ", ".join(prop_list)})
582+
583+
async def drop_text_index(self, name: str) -> None:
584+
"""Drop a full-text index by name.
585+
586+
Args:
587+
name: Index name previously passed to :meth:`create_text_index`.
588+
589+
Example::
590+
591+
await client.drop_text_index("article_body")
592+
"""
593+
await self.cypher(f"DROP TEXT INDEX {name}")
594+
527595
async def traverse(
528596
self,
529597
start_node_id: int,
@@ -604,6 +672,16 @@ async def text_search(
604672
605673
Returns:
606674
List of :class:`TextResult` ordered by BM25 score descending.
675+
Returns ``[]`` if no text index exists for *label*.
676+
677+
Note:
678+
Text indexing is **not** automatic. Before calling this method,
679+
create a full-text index with the Cypher DDL statement::
680+
681+
CREATE TEXT INDEX my_index ON :Label(property)
682+
683+
or via :meth:`create_text_index`. Nodes written before the index
684+
was created are indexed immediately at DDL execution time.
607685
"""
608686
from coordinode._proto.coordinode.v1.query.text_pb2 import TextSearchRequest # type: ignore[import]
609687

@@ -806,6 +884,21 @@ def create_edge_type(
806884
"""Create an edge type in the schema registry."""
807885
return self._run(self._async.create_edge_type(name, properties))
808886

887+
def create_text_index(
888+
self,
889+
name: str,
890+
label: str,
891+
properties: str | list[str],
892+
*,
893+
language: str = "",
894+
) -> TextIndexInfo:
895+
"""Create a full-text (BM25) index on one or more node properties."""
896+
return self._run(self._async.create_text_index(name, label, properties, language=language))
897+
898+
def drop_text_index(self, name: str) -> None:
899+
"""Drop a full-text index by name."""
900+
return self._run(self._async.drop_text_index(name))
901+
809902
def traverse(
810903
self,
811904
start_node_id: int,

tests/integration/test_sdk.py

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
EdgeTypeInfo,
2121
HybridResult,
2222
LabelInfo,
23+
TextIndexInfo,
2324
TextResult,
2425
TraverseResult,
2526
)
@@ -566,16 +567,13 @@ def test_vector_search_returns_results(client):
566567

567568

568569
# FTS tests require a CoordiNode server with TextService implemented (>=0.3.8).
569-
# They are marked xfail so the suite stays green against older servers; once
570-
# upgraded, the tests turn into expected passes automatically.
570+
# They are wrapped with @_fts so the suite stays green against older servers
571+
# (UNIMPLEMENTED gRPC status → xfail); against >=0.3.8 servers they are real passes.
571572
#
572-
# Note: create_label() is intentionally NOT called before text_search().
573-
# FTS indexing in CoordiNode is automatic for all nodes whose label was written
574-
# via CREATE/MERGE — no explicit label registration is required. On schema-strict
575-
# servers a caller may choose to pre-register a label, but the SDK's text_search()
576-
# and hybrid_text_vector_search() work on schema-free graphs too. These tests
577-
# exercise the common schema-free path; calling create_label() here would test a
578-
# different (schema-strict) code path and is covered by test_create_label_*.
573+
# FTS indexing is NOT automatic. Each test that expects non-empty results must
574+
# first create a text index with CREATE TEXT INDEX (or client.create_text_index())
575+
# and drop it in the finally block. Tests that deliberately cover the "no-index"
576+
# case (test_text_search_empty_for_unindexed_label) must NOT create an index.
579577
def _fts(fn):
580578
"""Wrap an FTS test to handle servers without TextService.
581579
@@ -606,18 +604,22 @@ def test_text_search_returns_results(client):
606604
"""text_search() finds nodes whose text property matches the query."""
607605
label = f"FtsTest_{uid()}"
608606
tag = uid()
607+
idx_name = f"idx_{label.lower()}"
609608
# CoordiNode executor serialises a node variable as Value::Int(node_id) — runner.rs NodeScan
610609
# path. No id() function needed; rows[0]["node_id"] is the integer internal node id.
611610
rows = client.cypher(
612611
f"CREATE (n:{label} {{tag: $tag, body: 'machine learning and neural networks'}}) RETURN n AS node_id",
613612
params={"tag": tag},
614613
)
615614
seed_id = rows[0]["node_id"]
615+
# Text index must be created explicitly; nodes written before index creation
616+
# are indexed immediately at DDL time.
617+
idx_info = client.create_text_index(idx_name, label, "body")
618+
assert isinstance(idx_info, TextIndexInfo)
616619
try:
617620
results = client.text_search(label, "machine learning", limit=5)
618621
assert isinstance(results, list)
619-
if not results:
620-
pytest.xfail("text_search returned no results — FTS index not available on this server")
622+
assert results, "text_search returned no results after index creation"
621623
assert any(r.node_id == seed_id for r in results), (
622624
f"seeded node {seed_id} not found in text_search results: {results}"
623625
)
@@ -628,6 +630,7 @@ def test_text_search_returns_results(client):
628630
assert r.score > 0
629631
assert isinstance(r.snippet, str)
630632
finally:
633+
client.drop_text_index(idx_name)
631634
client.cypher(f"MATCH (n:{label} {{tag: $tag}}) DELETE n", params={"tag": tag})
632635

633636

@@ -663,17 +666,19 @@ def test_text_search_fuzzy(client):
663666
"""text_search() with fuzzy=True matches approximate terms."""
664667
label = f"FtsFuzzyTest_{uid()}"
665668
tag = uid()
669+
idx_name = f"idx_{label.lower()}"
666670
client.cypher(
667671
f"CREATE (n:{label} {{tag: $tag, body: 'coordinode graph database'}})",
668672
params={"tag": tag},
669673
)
674+
client.create_text_index(idx_name, label, "body")
670675
try:
671-
# "coordinode" with a typo — fuzzy should still match
676+
# "coordinode" with a one-character typo — Levenshtein-1 fuzzy must match.
672677
results = client.text_search(label, "coordinod", fuzzy=True, limit=5)
673678
assert isinstance(results, list)
674-
# May return 0 results if fuzzy is not yet supported or index is cold;
675-
# just verify the call does not raise.
679+
assert results, "fuzzy text_search returned no results after index creation"
676680
finally:
681+
client.drop_text_index(idx_name)
677682
client.cypher(f"MATCH (n:{label} {{tag: $tag}}) DELETE n", params={"tag": tag})
678683

679684

@@ -682,13 +687,15 @@ def test_hybrid_text_vector_search_returns_results(client):
682687
"""hybrid_text_vector_search() returns HybridResult list with RRF scores."""
683688
label = f"FtsHybridTest_{uid()}"
684689
tag = uid()
690+
idx_name = f"idx_{label.lower()}"
685691
vec = [float(i) / 16 for i in range(16)]
686692
# Same node-as-int pattern: RETURN n → Value::Int(node_id) in CoordiNode executor.
687693
rows = client.cypher(
688694
f"CREATE (n:{label} {{tag: $tag, body: 'graph neural network embedding', embedding: $vec}}) RETURN n AS node_id",
689695
params={"tag": tag, "vec": vec},
690696
)
691697
seed_id = rows[0]["node_id"]
698+
client.create_text_index(idx_name, label, "body")
692699
try:
693700
results = client.hybrid_text_vector_search(
694701
label,
@@ -698,7 +705,7 @@ def test_hybrid_text_vector_search_returns_results(client):
698705
)
699706
assert isinstance(results, list)
700707
if not results:
701-
pytest.xfail("hybrid_text_vector_search returned no results — FTS index not available on this server")
708+
pytest.xfail("hybrid_text_vector_search returned no results — vector index not available on this server")
702709
assert any(r.node_id == seed_id for r in results), (
703710
f"seeded node {seed_id} not found in hybrid_text_vector_search results: {results}"
704711
)
@@ -708,4 +715,5 @@ def test_hybrid_text_vector_search_returns_results(client):
708715
assert isinstance(r.score, float)
709716
assert r.score > 0
710717
finally:
718+
client.drop_text_index(idx_name)
711719
client.cypher(f"MATCH (n:{label} {{tag: $tag}}) DETACH DELETE n", params={"tag": tag})

0 commit comments

Comments
 (0)