From b5ea28dacc065f6daf1b4976b27a6fc90913da09 Mon Sep 17 00:00:00 2001
From: Haiyuan Cao <raincoatrun@gmail.com>
Date: Sat, 2 May 2026 18:12:39 -0700
Subject: [PATCH 1/6] feat(ontology): add --skip-property-graph for user-owned
 graph DDL (#104)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lets users with their own CREATE PROPERTY GRAPH DDL — managed by
Terraform, dbt, or hand-authored — populate base tables from BQ AA
traces without overwriting the graph object on every run.

Changes
- ontology_orchestrator.build_ontology_graph gains
  skip_property_graph: bool = False. When True, phase 5 is not
  invoked: no OntologyPropertyGraphCompiler is constructed, no
  CREATE OR REPLACE PROPERTY GRAPH runs.
- Result dict gains property_graph_status with values "created" /
  "failed" / "skipped:user_requested", plus skipped_reason
  ("user_requested") when phase 5 was skipped.
- ontology-build CLI gains --skip-property-graph and threads
  property_graph_status through to the curated output dict so JSON
  consumers can distinguish "skipped" from "failed" without parsing
  stderr.
- Exit handling: skipped_reason == "user_requested" exits 0 silently;
  the existing exit-1-with-error behavior is preserved for actual
  graph-creation failures.

Tests
- test_skip_property_graph_does_not_construct_compiler asserts the
  compiler class is never called (mock.assert_not_called) when the
  flag is set.
- test_property_graph_status_created_on_success and
  test_property_graph_status_failed_on_compiler_false cover the two
  default-mode status values.
- CLI tests cover exit 0 with status="skipped:user_requested",
  default skip_property_graph=False threading, and exit 1 with
  status="failed" on actual creation failure.

135/135 tests in test_ontology_orchestrator.py + test_cli.py pass.
---
 src/bigquery_agent_analytics/cli.py           |  21 ++++
 .../ontology_orchestrator.py                  |  52 ++++++---
 tests/test_cli.py                             | 108 ++++++++++++++++++
 tests/test_ontology_orchestrator.py           | 104 +++++++++++++++++
 4 files changed, 272 insertions(+), 13 deletions(-)

diff --git a/src/bigquery_agent_analytics/cli.py b/src/bigquery_agent_analytics/cli.py
index 8e1ac4d..d02a4aa 100644
--- a/src/bigquery_agent_analytics/cli.py
+++ b/src/bigquery_agent_analytics/cli.py
@@ -1238,6 +1238,16 @@ def ontology_build(
     no_ai_generate: bool = typer.Option(
         False, help="Skip AI.GENERATE; fetch raw payloads instead."
     ),
+    skip_property_graph: bool = typer.Option(
+        False,
+        "--skip-property-graph",
+        help=(
+            "Skip CREATE OR REPLACE PROPERTY GRAPH. Use when the caller "
+            "owns their own property-graph DDL and only wants the SDK to "
+            "populate base tables. CLI exits 0 with "
+            "property_graph_status='skipped:user_requested'."
+        ),
+    ),
     fmt: str = typer.Option(
         "json",
         "--format",
@@ -1261,6 +1271,7 @@ def ontology_build(
         table_id=table_id,
         endpoint=endpoint,
         use_ai_generate=not no_ai_generate,
+        skip_property_graph=skip_property_graph,
     )
 
     output = {
@@ -1271,9 +1282,19 @@ def ontology_build(
         "tables_created": result["tables_created"],
         "rows_materialized": result["rows_materialized"],
         "property_graph_created": result["property_graph_created"],
+        "property_graph_status": result.get(
+            "property_graph_status",
+            "created" if result["property_graph_created"] else "failed",
+        ),
     }
     typer.echo(format_output(output, fmt))
 
+    # Distinguish "user-requested skip" (exit 0) from "creation failed"
+    # (exit 1). Same property_graph_created=False, different operator
+    # intent — JSON consumers read property_graph_status to tell them
+    # apart without parsing stderr.
+    if result.get("skipped_reason") == "user_requested":
+      return
     if not result["property_graph_created"]:
       typer.echo(
           "Error: Property Graph creation failed. "
diff --git a/src/bigquery_agent_analytics/ontology_orchestrator.py b/src/bigquery_agent_analytics/ontology_orchestrator.py
index cc2f43b..f82e3c0 100644
--- a/src/bigquery_agent_analytics/ontology_orchestrator.py
+++ b/src/bigquery_agent_analytics/ontology_orchestrator.py
@@ -300,6 +300,7 @@ def build_ontology_graph(
     endpoint: str = "gemini-2.5-flash",
     use_ai_generate: bool = True,
     location: Optional[str] = None,
+    skip_property_graph: bool = False,
 ) -> dict[str, Any]:
   """Run the full ontology graph pipeline end-to-end.
 
@@ -307,7 +308,8 @@ def build_ontology_graph(
   2. Extract an ``ExtractedGraph`` from agent telemetry.
   3. Create physical tables (if not exists).
   4. Materialize extracted nodes/edges into tables.
-  5. Create the BigQuery Property Graph.
+  5. Create the BigQuery Property Graph (skipped when
+     ``skip_property_graph=True``).
 
   Args:
       session_ids: Sessions to extract from.
@@ -323,10 +325,22 @@ def build_ontology_graph(
       endpoint: AI.GENERATE model endpoint.
       use_ai_generate: If True, uses server-side AI extraction.
       location: BigQuery location.
+      skip_property_graph: When True, skip phase 5 (do not run
+          ``CREATE OR REPLACE PROPERTY GRAPH``). Use this when the
+          caller owns their own property-graph DDL and only wants
+          the SDK to populate base tables. The result dict reports
+          ``property_graph_created=False`` with
+          ``skipped_reason="user_requested"`` and
+          ``property_graph_status="skipped:user_requested"``, which
+          callers (and the CLI) use to distinguish a deliberate
+          skip from a creation failure.
 
   Returns:
       A dict with keys: ``spec``, ``graph``, ``tables_created``,
       ``rows_materialized``, ``property_graph_created``,
+      ``property_graph_status`` (one of ``"created"``, ``"failed"``,
+      ``"skipped:user_requested"``), ``skipped_reason`` (only set
+      when phase 5 was skipped, e.g. ``"user_requested"``),
       ``graph_name``, ``graph_ref``.
   """
   from .ontology_graph import OntologyGraphManager
@@ -391,24 +405,36 @@ def build_ontology_graph(
   rows_materialized = materializer.materialize(graph, session_ids)
   logger.info("Rows materialized: %s", rows_materialized)
 
-  # 5. Create property graph.
-  compiler = OntologyPropertyGraphCompiler(
-      project_id=project_id,
-      dataset_id=dataset_id,
-      spec=spec,
-      location=location,
-  )
-  pg_created = compiler.create_property_graph(graph_name=name)
-
   graph_ref = f"{project_id}.{dataset_id}.{name}"
-  logger.info("Property Graph %r created=%s.", graph_ref, pg_created)
 
-  return {
+  # 5. Create property graph (or skip when caller owns the DDL).
+  result: dict[str, Any] = {
       "spec": spec,
       "graph": graph,
       "tables_created": tables_created,
       "rows_materialized": rows_materialized,
-      "property_graph_created": pg_created,
       "graph_name": name,
       "graph_ref": graph_ref,
   }
+  if skip_property_graph:
+    logger.info(
+        "Property Graph creation skipped (skip_property_graph=True); "
+        "caller owns the DDL for graph %r.",
+        graph_ref,
+    )
+    result["property_graph_created"] = False
+    result["skipped_reason"] = "user_requested"
+    result["property_graph_status"] = "skipped:user_requested"
+  else:
+    compiler = OntologyPropertyGraphCompiler(
+        project_id=project_id,
+        dataset_id=dataset_id,
+        spec=spec,
+        location=location,
+    )
+    pg_created = compiler.create_property_graph(graph_name=name)
+    logger.info("Property Graph %r created=%s.", graph_ref, pg_created)
+    result["property_graph_created"] = pg_created
+    result["property_graph_status"] = "created" if pg_created else "failed"
+
+  return result
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 15362dd..a94e599 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -2472,3 +2472,111 @@ def test_bad_spec_path_exit_2(self):
         ],
     )
     assert result.exit_code == 2
+
+  @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph")
+  def test_skip_property_graph_exits_zero_with_status(self, mock_build):
+    """--skip-property-graph: exit 0, status='skipped:user_requested'."""
+    from bigquery_agent_analytics.ontology_models import ExtractedGraph
+
+    mock_build.return_value = {
+        "graph_name": "g",
+        "graph_ref": "proj.ds.g",
+        "graph": ExtractedGraph(name="test"),
+        "tables_created": {"mako_DecisionPoint": "p.d.decision_points"},
+        "rows_materialized": {"mako_DecisionPoint": 2},
+        "property_graph_created": False,
+        "skipped_reason": "user_requested",
+        "property_graph_status": "skipped:user_requested",
+        "spec": MagicMock(),
+    }
+
+    result = runner.invoke(
+        app,
+        [
+            "ontology-build",
+            "--project-id=proj",
+            "--dataset-id=ds",
+            f"--spec-path={self._SPEC_PATH}",
+            "--session-ids=sess1",
+            "--env=p.d",
+            "--skip-property-graph",
+        ],
+    )
+    assert result.exit_code == 0
+    # Skip path must NOT print the "Property Graph creation failed" stderr.
+    assert "Property Graph creation failed" not in result.output
+    parsed = json.loads(result.output)
+    assert parsed["property_graph_created"] is False
+    assert parsed["property_graph_status"] == "skipped:user_requested"
+
+    # Flag is threaded through to the orchestrator.
+    _, kwargs = mock_build.call_args
+    assert kwargs["skip_property_graph"] is True
+
+  @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph")
+  def test_default_invocation_omits_skip_flag(self, mock_build):
+    """Default invocation passes skip_property_graph=False."""
+    from bigquery_agent_analytics.ontology_models import ExtractedGraph
+
+    mock_build.return_value = {
+        "graph_name": "g",
+        "graph_ref": "proj.ds.g",
+        "graph": ExtractedGraph(name="test"),
+        "tables_created": {},
+        "rows_materialized": {},
+        "property_graph_created": True,
+        "property_graph_status": "created",
+        "spec": MagicMock(),
+    }
+
+    result = runner.invoke(
+        app,
+        [
+            "ontology-build",
+            "--project-id=proj",
+            "--dataset-id=ds",
+            f"--spec-path={self._SPEC_PATH}",
+            "--session-ids=sess1",
+            "--env=p.d",
+        ],
+    )
+    assert result.exit_code == 0
+    parsed = json.loads(result.output)
+    assert parsed["property_graph_status"] == "created"
+
+    _, kwargs = mock_build.call_args
+    assert kwargs["skip_property_graph"] is False
+
+  @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph")
+  def test_property_graph_failure_status_failed(self, mock_build):
+    """When the orchestrator reports failure, exit 1 with status='failed'.
+
+    Distinguishes the failure path from the user-requested-skip path by
+    asserting the status field, not just the exit code.
+    """
+    from bigquery_agent_analytics.ontology_models import ExtractedGraph
+
+    mock_build.return_value = {
+        "graph_name": "g",
+        "graph_ref": "proj.ds.g",
+        "graph": ExtractedGraph(name="test"),
+        "tables_created": {},
+        "rows_materialized": {},
+        "property_graph_created": False,
+        "property_graph_status": "failed",
+        "spec": MagicMock(),
+    }
+
+    result = runner.invoke(
+        app,
+        [
+            "ontology-build",
+            "--project-id=proj",
+            "--dataset-id=ds",
+            f"--spec-path={self._SPEC_PATH}",
+            "--session-ids=sess1",
+            "--env=p.d",
+        ],
+    )
+    assert result.exit_code == 1
+    assert "Property Graph creation failed" in result.output
diff --git a/tests/test_ontology_orchestrator.py b/tests/test_ontology_orchestrator.py
index 11800c1..f677a20 100644
--- a/tests/test_ontology_orchestrator.py
+++ b/tests/test_ontology_orchestrator.py
@@ -469,3 +469,107 @@ def test_partial_table_creation_raises(
     # Materialize and property graph should NOT have been called.
     mock_mat_cls.return_value.materialize.assert_not_called()
     mock_pg_cls.return_value.create_property_graph.assert_not_called()
+
+  @patch(
+      "bigquery_agent_analytics.ontology_property_graph"
+      ".OntologyPropertyGraphCompiler"
+  )
+  @patch("bigquery_agent_analytics.ontology_materializer.OntologyMaterializer")
+  @patch("bigquery_agent_analytics.ontology_graph.OntologyGraphManager")
+  def test_skip_property_graph_does_not_construct_compiler(
+      self, mock_mgr_cls, mock_mat_cls, mock_pg_cls
+  ):
+    """When skip_property_graph=True, the compiler is never constructed."""
+    mock_mgr_cls.return_value.extract_graph.return_value = ExtractedGraph(
+        name="test"
+    )
+    mock_mat_cls.return_value.create_tables.return_value = dict(
+        _ALL_YMGO_TABLES
+    )
+    mock_mat_cls.return_value.materialize.return_value = {}
+
+    result = build_ontology_graph(
+        session_ids=["sess1"],
+        spec_path=_DEMO_SPEC_PATH,
+        project_id="proj",
+        dataset_id="ds",
+        env="p.d",
+        skip_property_graph=True,
+    )
+
+    # Compiler must not be constructed and create_property_graph must
+    # not be called when skip_property_graph=True.
+    mock_pg_cls.assert_not_called()
+    mock_pg_cls.return_value.create_property_graph.assert_not_called()
+
+    # Tables and rows still produced.
+    mock_mat_cls.return_value.create_tables.assert_called_once()
+    mock_mat_cls.return_value.materialize.assert_called_once()
+
+    # Result reports the skip distinctly from a creation failure.
+    assert result["property_graph_created"] is False
+    assert result["skipped_reason"] == "user_requested"
+    assert result["property_graph_status"] == "skipped:user_requested"
+
+  @patch(
+      "bigquery_agent_analytics.ontology_property_graph"
+      ".OntologyPropertyGraphCompiler"
+  )
+  @patch("bigquery_agent_analytics.ontology_materializer.OntologyMaterializer")
+  @patch("bigquery_agent_analytics.ontology_graph.OntologyGraphManager")
+  def test_property_graph_status_created_on_success(
+      self, mock_mgr_cls, mock_mat_cls, mock_pg_cls
+  ):
+    """Default flow with successful graph creation reports 'created'."""
+    mock_mgr_cls.return_value.extract_graph.return_value = ExtractedGraph(
+        name="test"
+    )
+    mock_mat_cls.return_value.create_tables.return_value = dict(
+        _ALL_YMGO_TABLES
+    )
+    mock_mat_cls.return_value.materialize.return_value = {}
+    mock_pg_cls.return_value.create_property_graph.return_value = True
+
+    result = build_ontology_graph(
+        session_ids=["sess1"],
+        spec_path=_DEMO_SPEC_PATH,
+        project_id="proj",
+        dataset_id="ds",
+        env="p.d",
+    )
+
+    assert result["property_graph_created"] is True
+    assert result["property_graph_status"] == "created"
+    assert "skipped_reason" not in result
+
+  @patch(
+      "bigquery_agent_analytics.ontology_property_graph"
+      ".OntologyPropertyGraphCompiler"
+  )
+  @patch("bigquery_agent_analytics.ontology_materializer.OntologyMaterializer")
+  @patch("bigquery_agent_analytics.ontology_graph.OntologyGraphManager")
+  def test_property_graph_status_failed_on_compiler_false(
+      self, mock_mgr_cls, mock_mat_cls, mock_pg_cls
+  ):
+    """Default flow where create_property_graph returns False reports
+    'failed' (distinct from 'skipped:user_requested')."""
+    mock_mgr_cls.return_value.extract_graph.return_value = ExtractedGraph(
+        name="test"
+    )
+    mock_mat_cls.return_value.create_tables.return_value = dict(
+        _ALL_YMGO_TABLES
+    )
+    mock_mat_cls.return_value.materialize.return_value = {}
+    mock_pg_cls.return_value.create_property_graph.return_value = False
+
+    result = build_ontology_graph(
+        session_ids=["sess1"],
+        spec_path=_DEMO_SPEC_PATH,
+        project_id="proj",
+        dataset_id="ds",
+        env="p.d",
+    )
+
+    assert result["property_graph_created"] is False
+    assert result["property_graph_status"] == "failed"
+    assert "skipped_reason" not in result

From 18548ee0339c70db4c67e3b8314e0b6fa5ab4241 Mon Sep 17 00:00:00 2001
From: Haiyuan Cao <raincoatrun@gmail.com>
Date: Sat, 2 May 2026 18:31:20 -0700
Subject: [PATCH 2/6] docs+test: ontology-build doc + live skip-property-graph
 test (#104)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Closes the two #104 acceptance gaps flagged on PR #108 review:

(1) Docs missing
- New docs/ontology/ontology-build.md documents the bq-agent-sdk
  ontology-build orchestrator end-to-end and the new
  --skip-property-graph flag.
- Includes a status-field reference table mapping
  property_graph_status (created / failed / skipped:user_requested)
  to property_graph_created and CLI exit code.
- Includes Python API example showing skip_property_graph=True with
  expected result-dict shape.

(2) No gated live integration test
- New TestSkipPropertyGraph class in
  tests/test_integration_ontology_binding.py.
- Gated on RUN_LIVE_BIGQUERY_TESTS=1 like the existing live tests.
- Sequence: create authored CREATE PROPERTY GRAPH directly via SQL
  (simulating Terraform/dbt-managed DDL), capture the post-DDL
  CURRENT_TIMESTAMP(), run build_ontology_graph(...,
  skip_property_graph=True), then query JOBS_BY_PROJECT for any
  'CREATE OR REPLACE PROPERTY GRAPH' jobs in the post-timestamp
  window — assert zero. Also re-runs the showcase GQL query to
  confirm the user's graph object still works after the SDK run.
- The timestamp is captured AFTER the authored DDL specifically to
  avoid the false-positive trap called out in #107 cell 1.3.
---
 docs/ontology/ontology-build.md            |  84 +++++++++++++
 tests/test_integration_ontology_binding.py | 131 +++++++++++++++++++++
 2 files changed, 215 insertions(+)
 create mode 100644 docs/ontology/ontology-build.md

diff --git a/docs/ontology/ontology-build.md b/docs/ontology/ontology-build.md
new file mode 100644
index 0000000..b706735
--- /dev/null
+++ b/docs/ontology/ontology-build.md
@@ -0,0 +1,84 @@
+# `bq-agent-sdk ontology-build` — End-to-End Orchestrator
+
+`bq-agent-sdk ontology-build` runs the SDK's full ontology pipeline end-to-end against a populated `agent_events` table:
+
+1. Load the spec (`--ontology X.yaml --binding Y.yaml`).
+2. Extract an `ExtractedGraph` from agent telemetry via `AI.GENERATE`.
+3. Create physical entity/relationship tables (`CREATE TABLE IF NOT EXISTS`).
+4. Materialize extracted nodes/edges into those tables.
+5. Run `CREATE OR REPLACE PROPERTY GRAPH` to wire the BigQuery property graph object.
+
+The Python entry point is `bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph(...)`. The CLI is a thin wrapper.
+
+## Skipping property-graph DDL
+
+Use `--skip-property-graph` when **the caller owns their own `CREATE PROPERTY GRAPH` DDL** — e.g., the property graph is provisioned via Terraform, dbt, or hand-authored SQL — and only wants the SDK to populate base tables.
+
+```
+bq-agent-sdk ontology-build \
+  --project-id my-project \
+  --dataset-id my-dataset \
+  --ontology my.ontology.yaml \
+  --binding my-bq-prod.binding.yaml \
+  --session-ids sess-1,sess-2 \
+  --skip-property-graph
+```
+
+Behavior with the flag set:
+
+- Phase 5 short-circuits. No `OntologyPropertyGraphCompiler` is constructed, no `CREATE OR REPLACE PROPERTY GRAPH` job runs. The user's existing graph object is unchanged.
+- Phases 1–4 run normally. Tables are created (`CREATE TABLE IF NOT EXISTS` is a no-op against pre-existing tables) and rows are materialized.
+- The CLI exits 0.
+- The output dict reports:
+
+  ```json
+  {
+    "property_graph_created": false,
+    "property_graph_status": "skipped:user_requested",
+    ...
+  }
+  ```
+
+  JSON consumers should read `property_graph_status` (not just `property_graph_created`) to distinguish a deliberate skip from a creation failure.
+
+## Status field reference
+
+The CLI's `property_graph_status` field has three values:
+
+| `property_graph_status` | `property_graph_created` | Exit code | Meaning |
+|---|---|---|---|
+| `"created"` | `true` | 0 | Phase 5 ran and BigQuery confirmed the graph object. |
+| `"failed"` | `false` | 1 | Phase 5 ran but the graph object was not created. The CLI prints "Property Graph creation failed" to stderr. Tables and rows were still materialized. |
+| `"skipped:user_requested"` | `false` | 0 | `--skip-property-graph` was set. Phase 5 did not run. No error message. |
+
+Without `--skip-property-graph`, the existing exit-1 behavior on graph-create failure is preserved exactly.
+
+## When to use this
+
+- **You already manage `CREATE PROPERTY GRAPH` in Terraform / dbt / a SQL file.** The SDK's `CREATE OR REPLACE PROPERTY GRAPH` would clobber your DDL on every run.
+- **Your property graph definition uses options the SDK doesn't generate.** You hand-authored the graph DDL to express features (custom labels, additional indexes, dialect-specific options) the SDK's compiler doesn't emit.
+- **You want to populate your tables on a different cadence than you redefine the graph.** The graph definition rarely changes; the data is refreshed continuously.
+
+For all other cases, leave the flag off and let the SDK manage the property graph end-to-end.
+
+## Python API
+
+The flag is also available on `build_ontology_graph(...)`:
+
+```python
+from bigquery_agent_analytics.ontology_orchestrator import build_ontology_graph
+
+result = build_ontology_graph(
+    spec=resolved_spec,
+    session_ids=["sess-1"],
+    project_id="my-project",
+    dataset_id="my-dataset",
+    skip_property_graph=True,  # phase 5 skipped
+)
+
+assert result["property_graph_status"] == "skipped:user_requested"
+assert result["skipped_reason"] == "user_requested"
+assert result["property_graph_created"] is False
+```
+
+`skipped_reason` is only present when the phase was skipped; it is omitted when phase 5 ran (whether or not it succeeded).
diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py
index 808cc62..c056234 100644
--- a/tests/test_integration_ontology_binding.py
+++ b/tests/test_integration_ontology_binding.py
@@ -328,6 +328,137 @@ def test_create_graph_and_query(
     assert len(rows) > 0, "GQL query returned 0 rows"
 
 
+class TestSkipPropertyGraph:
+  """Live test that --skip-property-graph does not run CREATE PROPERTY GRAPH.
+
+  Issue #104 acceptance: "creates a pre-existing property graph, runs
+  ontology-build --skip-property-graph against pre-existing base tables,
+  and verifies the user's graph definition is unchanged after the run."
+
+  Verified by:
+    - Capturing a timestamp after creating the user's CREATE PROPERTY
+      GRAPH directly (not via the SDK).
+    - Running build_ontology_graph(..., skip_property_graph=True).
+    - Querying INFORMATION_SCHEMA.JOBS_BY_PROJECT for any
+      'CREATE OR REPLACE PROPERTY GRAPH' jobs in the post-timestamp
+      window. Asserting zero.
+    - Asserting the GQL query against the user's graph still works
+      after the SDK run (graph object intact, base tables refreshed).
+  """
+
+  def test_skip_property_graph_issues_no_create_graph_job(
+      self, ontology_and_binding, lineage_config, scratch_dataset
+  ):
+    from google.cloud import bigquery
+
+    from bigquery_agent_analytics.ontology_materializer import (
+        OntologyMaterializer,
+    )
+    from bigquery_agent_analytics.ontology_orchestrator import (
+        build_ontology_graph,
+    )
+    from bigquery_agent_analytics.ontology_orchestrator import (
+        compile_showcase_gql,
+    )
+    from bigquery_agent_analytics.ontology_property_graph import (
+        OntologyPropertyGraphCompiler,
+    )
+    from bigquery_agent_analytics.resolved_spec import resolve
+
+    ontology, binding = ontology_and_binding
+    spec = resolve(ontology, binding, lineage_config=lineage_config)
+
+    # Step 1: create base tables (idempotent), then create the user's
+    # property graph via direct SQL (simulating Terraform/dbt-managed
+    # DDL the SDK should NOT touch when --skip-property-graph is set).
+    mat = OntologyMaterializer.from_ontology_binding(
+        ontology=ontology,
+        binding=binding,
+        lineage_config=lineage_config,
+        write_mode="batch_load",
+    )
+    mat.create_tables()
+
+    compiler = OntologyPropertyGraphCompiler.from_ontology_binding(
+        ontology=ontology,
+        binding=binding,
+        lineage_config=lineage_config,
+    )
+    assert compiler.create_property_graph() is True
+
+    # Step 2: capture the "before" timestamp AFTER the authored DDL
+    # has finished so the JOBS_BY_PROJECT filter does not catch our
+    # own setup job. Bind via a SQL CURRENT_TIMESTAMP() round-trip so
+    # the timestamp is BQ-aligned.
+    client = bigquery.Client(project=_PROJECT, location=_LOCATION)
+    before_ts_row = next(
+        iter(client.query("SELECT CURRENT_TIMESTAMP() AS ts").result())
+    )
+    before_skip_build_ts = before_ts_row.ts
+
+    # Step 3: run build_ontology_graph with skip_property_graph=True.
+    result = build_ontology_graph(
+        spec=spec,
+        session_ids=[_SESSION],
+        project_id=_PROJECT,
+        dataset_id=scratch_dataset,
+        graph_name=spec.name,
+        location=_LOCATION,
+        skip_property_graph=True,
+    )
+
+    assert result["property_graph_created"] is False
+    assert result["property_graph_status"] == "skipped:user_requested"
+    assert result["skipped_reason"] == "user_requested"
+
+    # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran in
+    # the post-timestamp window.
+    region_qual = f"`region-{_LOCATION.lower()}`"
+    jobs_query = f"""
+    SELECT job_id, query, creation_time
+    FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT
+    WHERE creation_time > @before
+      AND UPPER(query) LIKE '%CREATE OR REPLACE PROPERTY GRAPH%'
+    """
+    job = client.query(
+        jobs_query,
+        job_config=bigquery.QueryJobConfig(
+            query_parameters=[
+                bigquery.ScalarQueryParameter(
+                    "before", "TIMESTAMP", before_skip_build_ts
+                ),
+            ]
+        ),
+    )
+    create_graph_jobs = list(job.result())
+    assert len(create_graph_jobs) == 0, (
+        "Expected zero CREATE OR REPLACE PROPERTY GRAPH jobs after "
+        f"build_ontology_graph(skip_property_graph=True), got "
+        f"{len(create_graph_jobs)}: "
+        f"{[j.job_id for j in create_graph_jobs]}"
+    )
+
+    # Step 5: assert the user's graph object still works. Run the
+    # showcase GQL query — it should succeed (graph definition is
+    # intact) even though it may return zero rows if the test
+    # session_id has no matching edges in this scratch dataset.
+    gql = compile_showcase_gql(spec, _PROJECT, scratch_dataset)
+    gql_job = client.query(
+        gql,
+        job_config=bigquery.QueryJobConfig(
+            query_parameters=[
+                bigquery.ScalarQueryParameter(
+                    "session_id", "STRING", _SESSION
+                ),
+                bigquery.ScalarQueryParameter("result_limit", "INT64", 50),
+            ]
+        ),
+    )
+    # Result iteration confirms BigQuery accepted the GQL against
+    # the user's pre-existing property graph.
+    list(gql_job.result())
+
+
 class TestLineageEndToEnd:
   """Live lineage detection + GQL via from_ontology_binding."""
 

From 548046570a58313ec858886b323bc317e5277a40 Mon Sep 17 00:00:00 2001
From: Haiyuan Cao <raincoatrun@gmail.com>
Date: Sat, 2 May 2026 18:43:05 -0700
Subject: [PATCH 3/6] test+docs: harden live test, add text-format check, link
 doc (#104)

Addresses three review findings on PR #108:

(1) Live test now exercises real extraction/materialization
- Pass dataset_id=_DATASET, table_id=_TABLE so extraction reads
  the production agent_events table where YMGO ADCP session data
  lives. Materializer still writes to scratch_dataset because spec
  entity sources arrive 3-part-qualified to binding.target.dataset
  via _qualify_source (resolved_spec.py:141).
- Assert sum(rows_materialized.values()) > 0 to catch the silent-
  empty-graph trap where ontology_graph.py:683 returns an empty
  ExtractedGraph if extraction fails (e.g. wrong source dataset).

(2) JOBS_BY_PROJECT assertion narrowed to the test's own graph
- Filter by both 'CREATE OR REPLACE PROPERTY GRAPH' keyword AND
  the fully-qualified graph reference
  ({_PROJECT}.{scratch_dataset}.{spec.name}). Prevents false-fail
  on unrelated CREATE OR REPLACE PROPERTY GRAPH jobs running
  concurrently in the same project from other tests/developers.

(3) docs/README.md gains a row for the new ontology-build doc.

(4) New CLI test test_skip_property_graph_status_visible_in_text_format
asserts property_graph_status appears in --format=text output, pinning
the contract that the status field is not JSON-only.

7/7 ontology-build CLI tests pass.
---
 docs/README.md                             |  1 +
 tests/test_cli.py                          | 42 ++++++++++++++++++++++
 tests/test_integration_ontology_binding.py | 36 +++++++++++++++++--
 3 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index f8bbac0..dcb9d3c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -36,6 +36,7 @@ architecture, rationale, and implementation plans behind key SDK features.
 | [ontology/compilation.md](ontology/compilation.md) | Compilation — resolving ontology + binding into backend DDL |
 | [ontology/cli.md](ontology/cli.md) | CLI design for the `gm` tool (validate, compile, import-owl) |
 | [ontology/owl-import.md](ontology/owl-import.md) | OWL import — converting OWL ontologies to YAML format |
+| [ontology/ontology-build.md](ontology/ontology-build.md) | `bq-agent-sdk ontology-build` orchestrator + `--skip-property-graph` reference |
 
 ## Deployment Surfaces
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index a94e599..4564b99 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -2547,6 +2547,48 @@ def test_default_invocation_omits_skip_flag(self, mock_build):
     _, kwargs = mock_build.call_args
     assert kwargs["skip_property_graph"] is False
 
+  @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph")
+  def test_skip_property_graph_status_visible_in_text_format(
+      self, mock_build
+  ):
+    """--format=text exposes property_graph_status to non-JSON consumers.
+
+    Pins the contract that property_graph_status is not JSON-only:
+    --format=table renders dict keys; --format=text falls back to a
+    readable representation. The status string must appear in either.
+    """
+    from bigquery_agent_analytics.ontology_models import ExtractedGraph
+
+    mock_build.return_value = {
+        "graph_name": "g",
+        "graph_ref": "proj.ds.g",
+        "graph": ExtractedGraph(name="test"),
+        "tables_created": {},
+        "rows_materialized": {},
+        "property_graph_created": False,
+        "skipped_reason": "user_requested",
+        "property_graph_status": "skipped:user_requested",
+        "spec": MagicMock(),
+    }
+
+    result = runner.invoke(
+        app,
+        [
+            "ontology-build",
+            "--project-id=proj",
+            "--dataset-id=ds",
+            f"--spec-path={self._SPEC_PATH}",
+            "--session-ids=sess1",
+            "--env=p.d",
+            "--skip-property-graph",
+            "--format=text",
+        ],
+    )
+    assert result.exit_code == 0
+    # The status string must appear in the text-format output so non-
+    # JSON consumers can see why the graph was not created.
+    assert "skipped:user_requested" in result.output
+
   @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph")
   def test_property_graph_failure_status_failed(self, mock_build):
     """When the orchestrator reports failure, exit 1 with status='failed'.
diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py
index c056234..79a28b6 100644
--- a/tests/test_integration_ontology_binding.py
+++ b/tests/test_integration_ontology_binding.py
@@ -397,11 +397,20 @@ def test_skip_property_graph_issues_no_create_graph_job(
     before_skip_build_ts = before_ts_row.ts
 
     # Step 3: run build_ontology_graph with skip_property_graph=True.
+    # Extraction reads from the real _DATASET.agent_events table where
+    # the YMGO ADCP session data lives. Materialization writes to
+    # scratch_dataset because spec entity sources are already
+    # 3-part-qualified to binding.target.dataset = scratch_dataset
+    # (see _qualify_source at resolved_spec.py:141), so the
+    # materializer ignores its dataset_id parameter for output table
+    # location. The result: extract from prod-like, materialize to
+    # scratch — exactly the user-facing flow the test should exercise.
     result = build_ontology_graph(
         spec=spec,
         session_ids=[_SESSION],
         project_id=_PROJECT,
-        dataset_id=scratch_dataset,
+        dataset_id=_DATASET,
+        table_id=_TABLE,
         graph_name=spec.name,
         location=_LOCATION,
         skip_property_graph=True,
@@ -410,15 +419,31 @@ def test_skip_property_graph_issues_no_create_graph_job(
     assert result["property_graph_created"] is False
     assert result["property_graph_status"] == "skipped:user_requested"
     assert result["skipped_reason"] == "user_requested"
+    # Phases 1-4 must have actually populated the scratch tables.
+    # Catches the silent-empty-graph trap where extraction can fail
+    # (e.g. wrong source dataset) and ontology_graph.py:683 returns
+    # an empty ExtractedGraph rather than raising.
+    rows_total = sum(result["rows_materialized"].values())
+    assert rows_total > 0, (
+        "Expected at least 1 row materialized after skip-flag run, "
+        f"got rows_materialized={result['rows_materialized']!r}. "
+        "Extraction may have silently returned an empty graph."
+    )
 
-    # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran in
-    # the post-timestamp window.
+    # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran for
+    # *this scratch dataset's graph* in the post-timestamp window.
+    # Filter by both the DDL keyword and the fully-qualified graph
+    # reference so the test does not false-fail on an unrelated
+    # CREATE OR REPLACE PROPERTY GRAPH issued by another developer
+    # or test running concurrently in the same project.
+    expected_graph_ref = f"{_PROJECT}.{scratch_dataset}.{spec.name}"
     region_qual = f"`region-{_LOCATION.lower()}`"
     jobs_query = f"""
     SELECT job_id, query, creation_time
     FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT
     WHERE creation_time > @before
       AND UPPER(query) LIKE '%CREATE OR REPLACE PROPERTY GRAPH%'
+      AND query LIKE @graph_ref_pattern
     """
     job = client.query(
         jobs_query,
@@ -427,6 +452,11 @@ def test_skip_property_graph_issues_no_create_graph_job(
                 bigquery.ScalarQueryParameter(
                     "before", "TIMESTAMP", before_skip_build_ts
                 ),
+                bigquery.ScalarQueryParameter(
+                    "graph_ref_pattern",
+                    "STRING",
+                    f"%{expected_graph_ref}%",
+                ),
             ]
         ),
     )

From 5e53b61446fa80eaf1a33e172888361967bda022 Mon Sep 17 00:00:00 2001
From: Haiyuan Cao <raincoatrun@gmail.com>
Date: Sat, 2 May 2026 20:09:50 -0700
Subject: [PATCH 4/6] test+docs: harden DDL-detection filter, soften DDL claims
 (#104)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Addresses three review findings on PR #108:

(1) Live test DDL-detection blind spot
The previous filter required the regressed CREATE OR REPLACE
PROPERTY GRAPH to target _PROJECT.<scratch_dataset>.<spec.name>.
But if skip_property_graph regressed, the compiler would actually
target _PROJECT._DATASET.<spec.name> (the orchestrator's
dataset_id argument is _DATASET in this test, used for extraction
of agent_events). The blind spot: a regression could fire DDL
that the test would not catch.

Fixed by replacing the fully-qualified-graph-ref filter with two
narrower constraints that catch the regression in either dataset:
  - graph name (spec.name) — present in the DDL string regardless
    of which dataset the compiler targets
  - sdk_feature='ontology-gql' label — only SDK-issued
    property-graph jobs carry this label per
    ontology_property_graph.py:465; the test's setup CREATE
    PROPERTY GRAPH (issued via direct SQL) does not, so it does
    not trip the assertion

(2) docs/ontology/ontology-build.md: document graph_ref limitation
Added a "Known limitation" section noting that
result["graph_ref"] reports the extraction dataset, not the
binding's target dataset, in split source/target setups. The
materialized base tables themselves still go to the binding's
target dataset per the resolved spec; only the reported string is
affected.

(3) docs/ontology/ontology-build.md: soften DDL-options wording
"additional indexes, dialect-specific options" was overreaching for
BigQuery property graphs; tightened to "custom labels or other
DDL details the SDK's compiler doesn't generate."

136/136 tests pass.
---
 docs/ontology/ontology-build.md            |  6 +++-
 tests/test_integration_ontology_binding.py | 40 ++++++++++++++++------
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/docs/ontology/ontology-build.md b/docs/ontology/ontology-build.md
index b706735..ab72da2 100644
--- a/docs/ontology/ontology-build.md
+++ b/docs/ontology/ontology-build.md
@@ -56,7 +56,7 @@ Without `--skip-property-graph`, the existing exit-1 behavior on graph-create fa
 ## When to use this
 
 - **You already manage `CREATE PROPERTY GRAPH` in Terraform / dbt / a SQL file.** The SDK's `CREATE OR REPLACE PROPERTY GRAPH` would clobber your DDL on every run.
-- **Your property graph definition uses options the SDK doesn't generate.** You hand-authored the graph DDL to express features (custom labels, additional indexes, dialect-specific options) the SDK's compiler doesn't emit.
+- **Your property graph definition uses DDL details the SDK compiler doesn't emit.** You hand-authored the graph DDL to express custom labels or other DDL details the SDK's compiler doesn't generate.
 - **You want to populate your tables on a different cadence than you redefine the graph.** The graph definition rarely changes; the data is refreshed continuously.
 
 For all other cases, leave the flag off and let the SDK manage the property graph end-to-end.
@@ -82,3 +82,7 @@ assert result["property_graph_created"] is False
 ```
 
 `skipped_reason` is only present when the phase was skipped; it is omitted when phase 5 ran (whether or not it succeeded).
+
+## Known limitation: `result["graph_ref"]` in split source/target setups
+
+`build_ontology_graph(...)` accepts a single `dataset_id` and uses it both for extraction (where `agent_events` lives) and for the `graph_ref` reported in the result dict (`{project_id}.{dataset_id}.{name}`). When `--skip-property-graph` is set and the caller's actual property graph lives in `binding.target.dataset` (different from the `dataset_id` used for extraction), `result["graph_ref"]` reports the **extraction dataset**, not the user-owned graph's dataset. The materialized base tables themselves still go to `binding.target.dataset` per the resolved spec — this only affects the reported `graph_ref` string. Tracked as a follow-up; not blocking for `--skip-property-graph` itself since the user already knows where their authored graph lives.
diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py
index 79a28b6..5bbaae3 100644
--- a/tests/test_integration_ontology_binding.py
+++ b/tests/test_integration_ontology_binding.py
@@ -431,19 +431,39 @@ def test_skip_property_graph_issues_no_create_graph_job(
     )
 
     # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran for
-    # *this scratch dataset's graph* in the post-timestamp window.
-    # Filter by both the DDL keyword and the fully-qualified graph
-    # reference so the test does not false-fail on an unrelated
-    # CREATE OR REPLACE PROPERTY GRAPH issued by another developer
-    # or test running concurrently in the same project.
-    expected_graph_ref = f"{_PROJECT}.{scratch_dataset}.{spec.name}"
+    # *this test's graph* in the post-timestamp window.
+    #
+    # Filter design:
+    #   1. timestamp > the post-DDL baseline (closes the trap from
+    #      #107 cell 1.3 where the user's own setup CREATE PROPERTY
+    #      GRAPH would otherwise be caught).
+    #   2. DDL keyword.
+    #   3. graph name (spec.name) — the graph name is in the DDL
+    #      string regardless of which dataset the compiler would
+    #      target. If skip_property_graph regresses, the compiler
+    #      runs with dataset_id=_DATASET (the orchestrator's
+    #      argument), so the regressed DDL would target
+    #      _PROJECT._DATASET.<spec.name>, NOT
+    #      _PROJECT.<scratch_dataset>.<spec.name>. Filtering on the
+    #      graph name (rather than the fully-qualified ref) catches
+    #      the regression in either dataset.
+    #   4. sdk_feature='ontology-gql' label — only SDK-issued
+    #      property-graph jobs carry this label
+    #      (ontology_property_graph.py:465), so unrelated user-
+    #      authored CREATE PROPERTY GRAPH DDLs (including the test's
+    #      own setup job in step 1, which was not labeled this way)
+    #      do not trip the assertion.
     region_qual = f"`region-{_LOCATION.lower()}`"
     jobs_query = f"""
     SELECT job_id, query, creation_time
-    FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT
+    FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT AS j
     WHERE creation_time > @before
       AND UPPER(query) LIKE '%CREATE OR REPLACE PROPERTY GRAPH%'
-      AND query LIKE @graph_ref_pattern
+      AND query LIKE @graph_name_pattern
+      AND EXISTS (
+        SELECT 1 FROM UNNEST(j.labels) AS l
+        WHERE l.key = 'sdk_feature' AND l.value = 'ontology-gql'
+      )
     """
     job = client.query(
         jobs_query,
@@ -453,9 +473,9 @@ def test_skip_property_graph_issues_no_create_graph_job(
                     "before", "TIMESTAMP", before_skip_build_ts
                 ),
                 bigquery.ScalarQueryParameter(
-                    "graph_ref_pattern",
+                    "graph_name_pattern",
                     "STRING",
-                    f"%{expected_graph_ref}%",
+                    f"%{spec.name}%",
                 ),
             ]
         ),

From e44967bc132df5ecb1b145920ffc991227aff4d4 Mon Sep 17 00:00:00 2001
From: Haiyuan Cao <raincoatrun@gmail.com>
Date: Sat, 2 May 2026 20:12:18 -0700
Subject: [PATCH 5/6] test: correct comment on label-filter rationale (#104)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous comment claimed the test's setup CREATE PROPERTY GRAPH
job did not carry the sdk_feature='ontology-gql' label. That was
factually wrong: setup goes through
OntologyPropertyGraphCompiler.create_property_graph() (line 387),
which does carry the label.

The test logic was already correct — the setup job is excluded by
the post-setup timestamp captured in step 2, not by the label
filter. The label filter excludes user-authored raw SQL DDL jobs
(without SDK labels), which is its actual purpose. Only the comment
needed to change.

No code change.
---
 tests/test_integration_ontology_binding.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py
index 5bbaae3..8c44e35 100644
--- a/tests/test_integration_ontology_binding.py
+++ b/tests/test_integration_ontology_binding.py
@@ -449,10 +449,12 @@ def test_skip_property_graph_issues_no_create_graph_job(
     #      the regression in either dataset.
     #   4. sdk_feature='ontology-gql' label — only SDK-issued
     #      property-graph jobs carry this label
-    #      (ontology_property_graph.py:465), so unrelated user-
-    #      authored CREATE PROPERTY GRAPH DDLs (including the test's
-    #      own setup job in step 1, which was not labeled this way)
-    #      do not trip the assertion.
+    #      (ontology_property_graph.py:465). The setup CREATE PROPERTY
+    #      GRAPH job in step 1 *also* uses this label (it goes through
+    #      OntologyPropertyGraphCompiler.create_property_graph()), but
+    #      it is excluded by the post-setup timestamp captured in
+    #      step 2. User-authored raw SQL DDL jobs without SDK labels
+    #      are excluded by this label filter.
     region_qual = f"`region-{_LOCATION.lower()}`"
     jobs_query = f"""
     SELECT job_id, query, creation_time

From fcd7d9ce6a385234a447f71d088d5da2a31c1741 Mon Sep 17 00:00:00 2001
From: Haiyuan Cao <raincoatrun@gmail.com>
Date: Sat, 2 May 2026 23:25:18 -0700
Subject: [PATCH 6/6] style: apply autoformat to test files

Run bash autoformat.sh (isort + pyink). Fixes the Format check
CI job that was failing on PR #108.

No behavior change.
---
 tests/test_cli.py                          |  4 +---
 tests/test_integration_ontology_binding.py | 20 +++++---------------
 2 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 4564b99..2145575 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -2548,9 +2548,7 @@ def test_default_invocation_omits_skip_flag(self, mock_build):
     assert kwargs["skip_property_graph"] is False
 
   @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph")
-  def test_skip_property_graph_status_visible_in_text_format(
-      self, mock_build
-  ):
+  def test_skip_property_graph_status_visible_in_text_format(self, mock_build):
     """--format=text exposes property_graph_status to non-JSON consumers.
 
     Pins the contract that property_graph_status is not JSON-only:
diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py
index 8c44e35..323e7b1 100644
--- a/tests/test_integration_ontology_binding.py
+++ b/tests/test_integration_ontology_binding.py
@@ -351,18 +351,10 @@ def test_skip_property_graph_issues_no_create_graph_job(
   ):
     from google.cloud import bigquery
 
-    from bigquery_agent_analytics.ontology_materializer import (
-        OntologyMaterializer,
-    )
-    from bigquery_agent_analytics.ontology_orchestrator import (
-        build_ontology_graph,
-    )
-    from bigquery_agent_analytics.ontology_orchestrator import (
-        compile_showcase_gql,
-    )
-    from bigquery_agent_analytics.ontology_property_graph import (
-        OntologyPropertyGraphCompiler,
-    )
+    from bigquery_agent_analytics.ontology_materializer import OntologyMaterializer
+    from bigquery_agent_analytics.ontology_orchestrator import build_ontology_graph
+    from bigquery_agent_analytics.ontology_orchestrator import compile_showcase_gql
+    from bigquery_agent_analytics.ontology_property_graph import OntologyPropertyGraphCompiler
     from bigquery_agent_analytics.resolved_spec import resolve
 
     ontology, binding = ontology_and_binding
@@ -499,9 +491,7 @@ def test_skip_property_graph_issues_no_create_graph_job(
         gql,
         job_config=bigquery.QueryJobConfig(
             query_parameters=[
-                bigquery.ScalarQueryParameter(
-                    "session_id", "STRING", _SESSION
-                ),
+                bigquery.ScalarQueryParameter("session_id", "STRING", _SESSION),
                 bigquery.ScalarQueryParameter("result_limit", "INT64", 50),
             ]
         ),