fix(adapters): raise NotImplementedError for unsupported wildcard patterns

polaz · polaz · commit 19a3b346efbb · 2026-04-09T20:00:26.000+03:00
- get_rel_map: query schema edge types, build typed [r:T1|T2|...] pattern
  instead of wildcard [r] which returns no results in CoordiNode
- get_rel_map: raise NotImplementedError when depth != 1
- get_triplets: raise NotImplementedError when relation_names is None
- add_graph_documents docstring: clarify edges use unconditional CREATE
- _cypher_ident: use \w with re.ASCII instead of [A-Za-z0-9_]
- tests: count(*) instead of count(r) — CoordiNode returns 0 for rel vars
- tests: add relationship to idempotency test, assert edge count &gt;= 1
- tests: assert depth=2 raises NotImplementedError
- tests: fix example port 17080 -&gt; 7080 in docstrings
diff --git a/langchain-coordinode/langchain_coordinode/graph.py b/langchain-coordinode/langchain_coordinode/graph.py
@@ -91,14 +91,18 @@ def add_graph_documents(
     ) -> None:
         """Store nodes and relationships extracted from ``GraphDocument`` objects.
 
-        Nodes are upserted by ``id`` (used as the ``name`` property).
-        Relationships are created between existing nodes; if a relationship
-        between the same source and target already exists it is skipped.
+        Nodes are upserted by ``id`` (used as the ``name`` property) via
+        ``MERGE``, so repeated calls are safe for nodes.
+
+        Relationships are created with unconditional ``CREATE`` because
+        CoordiNode does not yet support ``MERGE`` for edge patterns.  Re-ingesting
+        the same ``GraphDocument`` will therefore produce duplicate edges.
 
         Args:
             graph_documents: List of ``langchain_community.graphs.graph_document.GraphDocument``.
             include_source: If ``True``, also store the source ``Document`` as a
-                ``__Document__`` node linked to every extracted entity.
+                ``__Document__`` node linked to every extracted entity via
+                ``MENTIONS`` edges (also unconditional ``CREATE``).
         """
         for doc in graph_documents:
             # ── Upsert nodes ──────────────────────────────────────────────
@@ -206,8 +210,8 @@ def _stable_document_id(source: Any) -> str:
 
 def _cypher_ident(name: str) -> str:
     """Escape a label/type name for use as a Cypher identifier."""
-    # If already safe (alphanumeric + underscore, not starting with digit) keep as-is
-    if re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name):
+    # ASCII-only word characters: letter/digit/underscore, not starting with digit.
+    if re.match(r"^[A-Za-z_]\w*$", name, re.ASCII):
         return name
     return f"`{name.replace('`', '``')}`"
 
diff --git a/llama-index-coordinode/llama_index/graph_stores/coordinode/base.py b/llama-index-coordinode/llama_index/graph_stores/coordinode/base.py
@@ -131,12 +131,15 @@ def get_triplets(
             rel_filter = "|".join(_cypher_ident(t) for t in relation_names)
             rel_pattern = f"[r:{rel_filter}]"
         else:
-            rel_pattern = "[r]"
+            # CoordiNode: wildcard [r] pattern returns no results.
+            # Callers must supply relation_names for the query to work.
+            raise NotImplementedError(
+                "CoordinodePropertyGraphStore.get_triplets() requires relation_names — "
+                "CoordiNode does not support untyped wildcard [r] patterns"
+            )
 
         where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
         # CoordiNode: use r.__type__ instead of type(r) — type() returns null.
-        # Wildcard [r] pattern also returns no results; caller must supply
-        # relation_names for wildcard queries to work.
         cypher = (
             f"MATCH (n)-{rel_pattern}->(m) {where} "
             "RETURN n, r.__type__ AS rel_type, m, n.id AS _src_id, m.id AS _dst_id "
@@ -165,28 +168,41 @@ def get_rel_map(
         limit: int = 30,
         ignore_rels: list[str] | None = None,
     ) -> list[list[LabelledNode]]:
-        """Get relationship map for a set of nodes up to ``depth`` hops."""
+        """Get relationship map for a set of nodes up to ``depth`` hops.
+
+        Note: only ``depth=1`` (single hop) is supported. ``depth > 1`` raises
+        ``NotImplementedError`` because CoordiNode does not yet serialise
+        variable-length path results.
+        """
+        if depth != 1:
+            raise NotImplementedError(
+                "CoordinodePropertyGraphStore.get_rel_map() currently supports depth=1 only; "
+                "variable-length path queries are not yet available in CoordiNode"
+            )
+
         if not graph_nodes:
             return []
 
-        node_ids = [n.id for n in graph_nodes]
-        ignored = list(ignore_rels) if ignore_rels else []
+        # CoordiNode: wildcard [r] pattern returns no results.  Fetch all
+        # known edge types from the schema and build a typed pattern instead,
+        # e.g. [r:TYPE_A|TYPE_B|...].
+        schema_text = self._client.get_schema_text()
+        edge_types = _parse_edge_types_from_schema(schema_text)
+
+        ignored = set(ignore_rels) if ignore_rels else set()
+        active_types = [t for t in edge_types if t not in ignored]
+
+        if not active_types:
+            return []
 
+        rel_filter = "|".join(_cypher_ident(t) for t in active_types)
+        node_ids = [n.id for n in graph_nodes]
         params: dict[str, object] = {"ids": node_ids}
-        ignore_clause = ""
-        if ignored:
-            # Single-hop [r]: filter with r.__type__ NOT IN $ignored_rels.
-            ignore_clause = " AND NOT r.__type__ IN $ignored_rels"
-            params["ignored_rels"] = ignored
-
-        # CoordiNode does not support variable-length path [r*1..N] in RETURN
-        # position (result serialisation is undefined for path lists).  Use a
-        # single-hop pattern; multi-hop traversal is a future enhancement.
-        _ = depth  # depth parameter reserved; currently single-hop only
+
         cypher = (
-            f"MATCH (n)-[r]->(m) "
-            f"WHERE n.id IN $ids{ignore_clause} "
-            f"RETURN n, r, m, n.id AS _src_id, m.id AS _dst_id "
+            f"MATCH (n)-[r:{rel_filter}]->(m) "
+            f"WHERE n.id IN $ids "
+            f"RETURN n, r.__type__ AS _rel_type, m, n.id AS _src_id, m.id AS _dst_id "
             f"LIMIT {limit}"
         )
         result = self._client.cypher(cypher, params=params)
@@ -197,13 +213,7 @@ def get_rel_map(
             dst_data = row.get("m", {})
             src_id = str(row.get("_src_id", ""))
             dst_id = str(row.get("_dst_id", ""))
-            # Single-hop [r] returns the relationship as a dict.
-            # CoordiNode: use __type__ key — type() returns null.
-            r_val = row.get("r", {})
-            if isinstance(r_val, dict):
-                rel_label = r_val.get("__type__") or r_val.get("type") or "RELATED"
-            else:
-                rel_label = "RELATED"
+            rel_label = str(row.get("_rel_type") or "RELATED")
             src = _node_result_to_labelled(src_id, src_data)
             dst = _node_result_to_labelled(dst_id, dst_data)
             rel = Relation(label=rel_label, source_id=src_id, target_id=dst_id)
@@ -357,3 +367,25 @@ def _node_label(node: LabelledNode) -> str:
     if isinstance(node, EntityNode):
         return node.label or "Entity"
     return "Node"
+
+
+def _parse_edge_types_from_schema(schema_text: str) -> list[str]:
+    """Extract edge type names from CoordiNode schema text.
+
+    Parses the "Edge types:" section produced by ``get_schema_text()``.
+    """
+    edge_types: list[str] = []
+    in_edges = False
+    for line in schema_text.splitlines():
+        stripped = line.strip()
+        if stripped.lower().startswith("edge types"):
+            in_edges = True
+            continue
+        if in_edges:
+            if not stripped:
+                break
+            if stripped.startswith("-") or stripped.startswith("*"):
+                name = stripped.lstrip("-* ").split("(")[0].strip()
+                if name:
+                    edge_types.append(name)
+    return edge_types
diff --git a/tests/integration/adapters/test_langchain.py b/tests/integration/adapters/test_langchain.py
@@ -4,7 +4,7 @@
 (default: localhost:7080).
 
 Run via:
-    COORDINODE_ADDR=localhost:17080 pytest tests/integration/adapters/test_langchain.py -v
+    COORDINODE_ADDR=localhost:7080 pytest tests/integration/adapters/test_langchain.py -v
 """
 
 import os
@@ -97,27 +97,47 @@ def test_add_graph_documents_creates_relationship(graph, unique_tag):
     graph.add_graph_documents([doc])
 
     # Verify the relationship was created, not just the source node.
+    # count(*) instead of count(r): CoordiNode returns 0 for relationship-variable counts
     result = graph.query(
-        "MATCH (a:LCPerson2 {name: $src})-[r:LC_RESEARCHES]->(b:LCConcept {name: $dst}) RETURN count(r) AS cnt",
+        "MATCH (a:LCPerson2 {name: $src})-[r:LC_RESEARCHES]->(b:LCConcept {name: $dst}) RETURN count(*) AS cnt",
         params={"src": f"Charlie-{unique_tag}", "dst": f"GraphRAG-{unique_tag}"},
     )
     assert result[0]["cnt"] >= 1, f"relationship not found: {result}"
 
 
 def test_add_graph_documents_idempotent(graph, unique_tag):
-    """Calling add_graph_documents twice must not raise."""
-    node = Node(id=f"Idempotent-{unique_tag}", type="LCIdempotent")
-    doc = GraphDocument(nodes=[node], relationships=[], source=Document(page_content="test"))
+    """Calling add_graph_documents twice must not raise.
+
+    Nodes are idempotent (MERGE).  Edges are NOT — CoordiNode does not yet
+    support MERGE for edges, so unconditional CREATE is used and duplicate
+    edges are expected after two ingests.
+    """
+    node_a = Node(id=f"Idempotent-{unique_tag}", type="LCIdempotent")
+    node_b = Node(id=f"IdempTarget-{unique_tag}", type="LCIdempotent")
+    rel = Relationship(source=node_a, target=node_b, type="LC_IDEMP_REL")
+    doc = GraphDocument(
+        nodes=[node_a, node_b],
+        relationships=[rel],
+        source=Document(page_content="test"),
+    )
 
     graph.add_graph_documents([doc])
     graph.add_graph_documents([doc])  # second call must not raise
 
+    # Nodes: MERGE keeps count at 1
     result = graph.query(
-        "MATCH (n:LCIdempotent {name: $name}) RETURN count(n) AS cnt",
+        "MATCH (n:LCIdempotent {name: $name}) RETURN count(*) AS cnt",
         params={"name": f"Idempotent-{unique_tag}"},
     )
     assert result[0]["cnt"] == 1
 
+    # Edges: unconditional CREATE → count >= 1 (may be > 1 due to CoordiNode limitation)
+    result = graph.query(
+        "MATCH (a:LCIdempotent {name: $src})-[r:LC_IDEMP_REL]->(b:LCIdempotent {name: $dst}) RETURN count(*) AS cnt",
+        params={"src": f"Idempotent-{unique_tag}", "dst": f"IdempTarget-{unique_tag}"},
+    )
+    assert result[0]["cnt"] >= 1
+
 
 def test_schema_refreshes_after_add(graph, unique_tag):
     """structured_schema is invalidated and re-fetched after add_graph_documents."""
diff --git a/tests/integration/adapters/test_llama_index.py b/tests/integration/adapters/test_llama_index.py
@@ -4,7 +4,7 @@
 (default: localhost:7080).
 
 Run via:
-    COORDINODE_ADDR=localhost:17080 pytest tests/integration/adapters/test_llama_index.py -v
+    COORDINODE_ADDR=localhost:7080 pytest tests/integration/adapters/test_llama_index.py -v
 """
 
 import os
@@ -122,6 +122,15 @@ def test_get_rel_map(store, tag):
     assert len(result) >= 1
 
 
+def test_get_rel_map_depth_gt1_raises(store, tag):
+    """depth > 1 must raise NotImplementedError until multi-hop is supported."""
+    node = EntityNode(label="LIRelMapDepth", name=f"DepthNode-{tag}")
+    store.upsert_nodes([node])
+
+    with pytest.raises(NotImplementedError):
+        store.get_rel_map([node], depth=2, limit=10)
+
+
 # ── Delete ────────────────────────────────────────────────────────────────────