From b5ea28dacc065f6daf1b4976b27a6fc90913da09 Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Sat, 2 May 2026 18:12:39 -0700 Subject: [PATCH 1/6] feat(ontology): add --skip-property-graph for user-owned graph DDL (#104) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets users with their own CREATE PROPERTY GRAPH DDL — managed by Terraform, dbt, or hand-authored — populate base tables from BQ AA traces without overwriting the graph object on every run. Changes - ontology_orchestrator.build_ontology_graph gains skip_property_graph: bool = False. When True, phase 5 is not invoked: no OntologyPropertyGraphCompiler is constructed, no CREATE OR REPLACE PROPERTY GRAPH runs. - Result dict gains property_graph_status with values "created" / "failed" / "skipped:user_requested", plus skipped_reason ("user_requested") when phase 5 was skipped. - ontology-build CLI gains --skip-property-graph and threads property_graph_status through to the curated output dict so JSON consumers can distinguish "skipped" from "failed" without parsing stderr. - Exit handling: skipped_reason == "user_requested" exits 0 silently; the existing exit-1-with-error behavior is preserved for actual graph-creation failures. Tests - test_skip_property_graph_does_not_construct_compiler asserts the compiler class is never called (mock.assert_not_called) when the flag is set. - test_property_graph_status_created_on_success and test_property_graph_status_failed_on_compiler_false cover the two default-mode status values. - CLI tests cover exit 0 with status="skipped:user_requested", default skip_property_graph=False threading, and exit 1 with status="failed" on actual creation failure. 135/135 tests in test_ontology_orchestrator.py + test_cli.py pass. --- src/bigquery_agent_analytics/cli.py | 21 ++++ .../ontology_orchestrator.py | 52 ++++++--- tests/test_cli.py | 108 ++++++++++++++++++ tests/test_ontology_orchestrator.py | 104 +++++++++++++++++ 4 files changed, 272 insertions(+), 13 deletions(-) diff --git a/src/bigquery_agent_analytics/cli.py b/src/bigquery_agent_analytics/cli.py index 8e1ac4d..d02a4aa 100644 --- a/src/bigquery_agent_analytics/cli.py +++ b/src/bigquery_agent_analytics/cli.py @@ -1238,6 +1238,16 @@ def ontology_build( no_ai_generate: bool = typer.Option( False, help="Skip AI.GENERATE; fetch raw payloads instead." ), + skip_property_graph: bool = typer.Option( + False, + "--skip-property-graph", + help=( + "Skip CREATE OR REPLACE PROPERTY GRAPH. Use when the caller " + "owns their own property-graph DDL and only wants the SDK to " + "populate base tables. CLI exits 0 with " + "property_graph_status='skipped:user_requested'." + ), + ), fmt: str = typer.Option( "json", "--format", @@ -1261,6 +1271,7 @@ def ontology_build( table_id=table_id, endpoint=endpoint, use_ai_generate=not no_ai_generate, + skip_property_graph=skip_property_graph, ) output = { @@ -1271,9 +1282,19 @@ def ontology_build( "tables_created": result["tables_created"], "rows_materialized": result["rows_materialized"], "property_graph_created": result["property_graph_created"], + "property_graph_status": result.get( + "property_graph_status", + "created" if result["property_graph_created"] else "failed", + ), } typer.echo(format_output(output, fmt)) + # Distinguish "user-requested skip" (exit 0) from "creation failed" + # (exit 1). Same property_graph_created=False, different operator + # intent — JSON consumers read property_graph_status to tell them + # apart without parsing stderr. + if result.get("skipped_reason") == "user_requested": + return if not result["property_graph_created"]: typer.echo( "Error: Property Graph creation failed. " diff --git a/src/bigquery_agent_analytics/ontology_orchestrator.py b/src/bigquery_agent_analytics/ontology_orchestrator.py index cc2f43b..f82e3c0 100644 --- a/src/bigquery_agent_analytics/ontology_orchestrator.py +++ b/src/bigquery_agent_analytics/ontology_orchestrator.py @@ -300,6 +300,7 @@ def build_ontology_graph( endpoint: str = "gemini-2.5-flash", use_ai_generate: bool = True, location: Optional[str] = None, + skip_property_graph: bool = False, ) -> dict[str, Any]: """Run the full ontology graph pipeline end-to-end. @@ -307,7 +308,8 @@ def build_ontology_graph( 2. Extract an ``ExtractedGraph`` from agent telemetry. 3. Create physical tables (if not exists). 4. Materialize extracted nodes/edges into tables. - 5. Create the BigQuery Property Graph. + 5. Create the BigQuery Property Graph (skipped when + ``skip_property_graph=True``). Args: session_ids: Sessions to extract from. @@ -323,10 +325,22 @@ def build_ontology_graph( endpoint: AI.GENERATE model endpoint. use_ai_generate: If True, uses server-side AI extraction. location: BigQuery location. + skip_property_graph: When True, skip phase 5 (do not run + ``CREATE OR REPLACE PROPERTY GRAPH``). Use this when the + caller owns their own property-graph DDL and only wants + the SDK to populate base tables. The result dict reports + ``property_graph_created=False`` with + ``skipped_reason="user_requested"`` and + ``property_graph_status="skipped:user_requested"``, which + callers (and the CLI) use to distinguish a deliberate + skip from a creation failure. Returns: A dict with keys: ``spec``, ``graph``, ``tables_created``, ``rows_materialized``, ``property_graph_created``, + ``property_graph_status`` (one of ``"created"``, ``"failed"``, + ``"skipped:user_requested"``), ``skipped_reason`` (only set + when phase 5 was skipped, e.g. ``"user_requested"``), ``graph_name``, ``graph_ref``. """ from .ontology_graph import OntologyGraphManager @@ -391,24 +405,36 @@ def build_ontology_graph( rows_materialized = materializer.materialize(graph, session_ids) logger.info("Rows materialized: %s", rows_materialized) - # 5. Create property graph. - compiler = OntologyPropertyGraphCompiler( - project_id=project_id, - dataset_id=dataset_id, - spec=spec, - location=location, - ) - pg_created = compiler.create_property_graph(graph_name=name) - graph_ref = f"{project_id}.{dataset_id}.{name}" - logger.info("Property Graph %r created=%s.", graph_ref, pg_created) - return { + # 5. Create property graph (or skip when caller owns the DDL). + result: dict[str, Any] = { "spec": spec, "graph": graph, "tables_created": tables_created, "rows_materialized": rows_materialized, - "property_graph_created": pg_created, "graph_name": name, "graph_ref": graph_ref, } + if skip_property_graph: + logger.info( + "Property Graph creation skipped (skip_property_graph=True); " + "caller owns the DDL for graph %r.", + graph_ref, + ) + result["property_graph_created"] = False + result["skipped_reason"] = "user_requested" + result["property_graph_status"] = "skipped:user_requested" + else: + compiler = OntologyPropertyGraphCompiler( + project_id=project_id, + dataset_id=dataset_id, + spec=spec, + location=location, + ) + pg_created = compiler.create_property_graph(graph_name=name) + logger.info("Property Graph %r created=%s.", graph_ref, pg_created) + result["property_graph_created"] = pg_created + result["property_graph_status"] = "created" if pg_created else "failed" + + return result diff --git a/tests/test_cli.py b/tests/test_cli.py index 15362dd..a94e599 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2472,3 +2472,111 @@ def test_bad_spec_path_exit_2(self): ], ) assert result.exit_code == 2 + + @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph") + def test_skip_property_graph_exits_zero_with_status(self, mock_build): + """--skip-property-graph: exit 0, status='skipped:user_requested'.""" + from bigquery_agent_analytics.ontology_models import ExtractedGraph + + mock_build.return_value = { + "graph_name": "g", + "graph_ref": "proj.ds.g", + "graph": ExtractedGraph(name="test"), + "tables_created": {"mako_DecisionPoint": "p.d.decision_points"}, + "rows_materialized": {"mako_DecisionPoint": 2}, + "property_graph_created": False, + "skipped_reason": "user_requested", + "property_graph_status": "skipped:user_requested", + "spec": MagicMock(), + } + + result = runner.invoke( + app, + [ + "ontology-build", + "--project-id=proj", + "--dataset-id=ds", + f"--spec-path={self._SPEC_PATH}", + "--session-ids=sess1", + "--env=p.d", + "--skip-property-graph", + ], + ) + assert result.exit_code == 0 + # Skip path must NOT print the "Property Graph creation failed" stderr. + assert "Property Graph creation failed" not in result.output + parsed = json.loads(result.output) + assert parsed["property_graph_created"] is False + assert parsed["property_graph_status"] == "skipped:user_requested" + + # Flag is threaded through to the orchestrator. + _, kwargs = mock_build.call_args + assert kwargs["skip_property_graph"] is True + + @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph") + def test_default_invocation_omits_skip_flag(self, mock_build): + """Default invocation passes skip_property_graph=False.""" + from bigquery_agent_analytics.ontology_models import ExtractedGraph + + mock_build.return_value = { + "graph_name": "g", + "graph_ref": "proj.ds.g", + "graph": ExtractedGraph(name="test"), + "tables_created": {}, + "rows_materialized": {}, + "property_graph_created": True, + "property_graph_status": "created", + "spec": MagicMock(), + } + + result = runner.invoke( + app, + [ + "ontology-build", + "--project-id=proj", + "--dataset-id=ds", + f"--spec-path={self._SPEC_PATH}", + "--session-ids=sess1", + "--env=p.d", + ], + ) + assert result.exit_code == 0 + parsed = json.loads(result.output) + assert parsed["property_graph_status"] == "created" + + _, kwargs = mock_build.call_args + assert kwargs["skip_property_graph"] is False + + @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph") + def test_property_graph_failure_status_failed(self, mock_build): + """When the orchestrator reports failure, exit 1 with status='failed'. + + Distinguishes the failure path from the user-requested-skip path by + asserting the status field, not just the exit code. + """ + from bigquery_agent_analytics.ontology_models import ExtractedGraph + + mock_build.return_value = { + "graph_name": "g", + "graph_ref": "proj.ds.g", + "graph": ExtractedGraph(name="test"), + "tables_created": {}, + "rows_materialized": {}, + "property_graph_created": False, + "property_graph_status": "failed", + "spec": MagicMock(), + } + + result = runner.invoke( + app, + [ + "ontology-build", + "--project-id=proj", + "--dataset-id=ds", + f"--spec-path={self._SPEC_PATH}", + "--session-ids=sess1", + "--env=p.d", + ], + ) + assert result.exit_code == 1 + assert "Property Graph creation failed" in result.output diff --git a/tests/test_ontology_orchestrator.py b/tests/test_ontology_orchestrator.py index 11800c1..f677a20 100644 --- a/tests/test_ontology_orchestrator.py +++ b/tests/test_ontology_orchestrator.py @@ -469,3 +469,107 @@ def test_partial_table_creation_raises( # Materialize and property graph should NOT have been called. mock_mat_cls.return_value.materialize.assert_not_called() mock_pg_cls.return_value.create_property_graph.assert_not_called() + + @patch( + "bigquery_agent_analytics.ontology_property_graph" + ".OntologyPropertyGraphCompiler" + ) + @patch("bigquery_agent_analytics.ontology_materializer.OntologyMaterializer") + @patch("bigquery_agent_analytics.ontology_graph.OntologyGraphManager") + def test_skip_property_graph_does_not_construct_compiler( + self, mock_mgr_cls, mock_mat_cls, mock_pg_cls + ): + """When skip_property_graph=True, the compiler is never constructed.""" + mock_mgr_cls.return_value.extract_graph.return_value = ExtractedGraph( + name="test" + ) + mock_mat_cls.return_value.create_tables.return_value = dict( + _ALL_YMGO_TABLES + ) + mock_mat_cls.return_value.materialize.return_value = {} + + result = build_ontology_graph( + session_ids=["sess1"], + spec_path=_DEMO_SPEC_PATH, + project_id="proj", + dataset_id="ds", + env="p.d", + skip_property_graph=True, + ) + + # Compiler must not be constructed and create_property_graph must + # not be called when skip_property_graph=True. + mock_pg_cls.assert_not_called() + mock_pg_cls.return_value.create_property_graph.assert_not_called() + + # Tables and rows still produced. + mock_mat_cls.return_value.create_tables.assert_called_once() + mock_mat_cls.return_value.materialize.assert_called_once() + + # Result reports the skip distinctly from a creation failure. + assert result["property_graph_created"] is False + assert result["skipped_reason"] == "user_requested" + assert result["property_graph_status"] == "skipped:user_requested" + + @patch( + "bigquery_agent_analytics.ontology_property_graph" + ".OntologyPropertyGraphCompiler" + ) + @patch("bigquery_agent_analytics.ontology_materializer.OntologyMaterializer") + @patch("bigquery_agent_analytics.ontology_graph.OntologyGraphManager") + def test_property_graph_status_created_on_success( + self, mock_mgr_cls, mock_mat_cls, mock_pg_cls + ): + """Default flow with successful graph creation reports 'created'.""" + mock_mgr_cls.return_value.extract_graph.return_value = ExtractedGraph( + name="test" + ) + mock_mat_cls.return_value.create_tables.return_value = dict( + _ALL_YMGO_TABLES + ) + mock_mat_cls.return_value.materialize.return_value = {} + mock_pg_cls.return_value.create_property_graph.return_value = True + + result = build_ontology_graph( + session_ids=["sess1"], + spec_path=_DEMO_SPEC_PATH, + project_id="proj", + dataset_id="ds", + env="p.d", + ) + + assert result["property_graph_created"] is True + assert result["property_graph_status"] == "created" + assert "skipped_reason" not in result + + @patch( + "bigquery_agent_analytics.ontology_property_graph" + ".OntologyPropertyGraphCompiler" + ) + @patch("bigquery_agent_analytics.ontology_materializer.OntologyMaterializer") + @patch("bigquery_agent_analytics.ontology_graph.OntologyGraphManager") + def test_property_graph_status_failed_on_compiler_false( + self, mock_mgr_cls, mock_mat_cls, mock_pg_cls + ): + """Default flow where create_property_graph returns False reports + 'failed' (distinct from 'skipped:user_requested').""" + mock_mgr_cls.return_value.extract_graph.return_value = ExtractedGraph( + name="test" + ) + mock_mat_cls.return_value.create_tables.return_value = dict( + _ALL_YMGO_TABLES + ) + mock_mat_cls.return_value.materialize.return_value = {} + mock_pg_cls.return_value.create_property_graph.return_value = False + + result = build_ontology_graph( + session_ids=["sess1"], + spec_path=_DEMO_SPEC_PATH, + project_id="proj", + dataset_id="ds", + env="p.d", + ) + + assert result["property_graph_created"] is False + assert result["property_graph_status"] == "failed" + assert "skipped_reason" not in result From 18548ee0339c70db4c67e3b8314e0b6fa5ab4241 Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Sat, 2 May 2026 18:31:20 -0700 Subject: [PATCH 2/6] docs+test: ontology-build doc + live skip-property-graph test (#104) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the two #104 acceptance gaps flagged on PR #108 review: (1) Docs missing - New docs/ontology/ontology-build.md documents the bq-agent-sdk ontology-build orchestrator end-to-end and the new --skip-property-graph flag. - Includes a status-field reference table mapping property_graph_status (created / failed / skipped:user_requested) to property_graph_created and CLI exit code. - Includes Python API example showing skip_property_graph=True with expected result-dict shape. (2) No gated live integration test - New TestSkipPropertyGraph class in tests/test_integration_ontology_binding.py. - Gated on RUN_LIVE_BIGQUERY_TESTS=1 like the existing live tests. - Sequence: create authored CREATE PROPERTY GRAPH directly via SQL (simulating Terraform/dbt-managed DDL), capture the post-DDL CURRENT_TIMESTAMP(), run build_ontology_graph(..., skip_property_graph=True), then query JOBS_BY_PROJECT for any 'CREATE OR REPLACE PROPERTY GRAPH' jobs in the post-timestamp window — assert zero. Also re-runs the showcase GQL query to confirm the user's graph object still works after the SDK run. - The timestamp is captured AFTER the authored DDL specifically to avoid the false-positive trap called out in #107 cell 1.3. --- docs/ontology/ontology-build.md | 84 +++++++++++++ tests/test_integration_ontology_binding.py | 131 +++++++++++++++++++++ 2 files changed, 215 insertions(+) create mode 100644 docs/ontology/ontology-build.md diff --git a/docs/ontology/ontology-build.md b/docs/ontology/ontology-build.md new file mode 100644 index 0000000..b706735 --- /dev/null +++ b/docs/ontology/ontology-build.md @@ -0,0 +1,84 @@ +# `bq-agent-sdk ontology-build` — End-to-End Orchestrator + +`bq-agent-sdk ontology-build` runs the SDK's full ontology pipeline end-to-end against a populated `agent_events` table: + +1. Load the spec (`--ontology X.yaml --binding Y.yaml`). +2. Extract an `ExtractedGraph` from agent telemetry via `AI.GENERATE`. +3. Create physical entity/relationship tables (`CREATE TABLE IF NOT EXISTS`). +4. Materialize extracted nodes/edges into those tables. +5. Run `CREATE OR REPLACE PROPERTY GRAPH` to wire the BigQuery property graph object. + +The Python entry point is `bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph(...)`. The CLI is a thin wrapper. + +## Skipping property-graph DDL + +Use `--skip-property-graph` when **the caller owns their own `CREATE PROPERTY GRAPH` DDL** — e.g., the property graph is provisioned via Terraform, dbt, or hand-authored SQL — and only wants the SDK to populate base tables. + +``` +bq-agent-sdk ontology-build \ + --project-id my-project \ + --dataset-id my-dataset \ + --ontology my.ontology.yaml \ + --binding my-bq-prod.binding.yaml \ + --session-ids sess-1,sess-2 \ + --skip-property-graph +``` + +Behavior with the flag set: + +- Phase 5 short-circuits. No `OntologyPropertyGraphCompiler` is constructed, no `CREATE OR REPLACE PROPERTY GRAPH` job runs. The user's existing graph object is unchanged. +- Phases 1–4 run normally. Tables are created (`CREATE TABLE IF NOT EXISTS` is a no-op against pre-existing tables) and rows are materialized. +- The CLI exits 0. +- The output dict reports: + + ```json + { + "property_graph_created": false, + "property_graph_status": "skipped:user_requested", + ... + } + ``` + + JSON consumers should read `property_graph_status` (not just `property_graph_created`) to distinguish a deliberate skip from a creation failure. + +## Status field reference + +The CLI's `property_graph_status` field has three values: + +| `property_graph_status` | `property_graph_created` | Exit code | Meaning | +|---|---|---|---| +| `"created"` | `true` | 0 | Phase 5 ran and BigQuery confirmed the graph object. | +| `"failed"` | `false` | 1 | Phase 5 ran but the graph object was not created. The CLI prints "Property Graph creation failed" to stderr. Tables and rows were still materialized. | +| `"skipped:user_requested"` | `false` | 0 | `--skip-property-graph` was set. Phase 5 did not run. No error message. | + +Without `--skip-property-graph`, the existing exit-1 behavior on graph-create failure is preserved exactly. + +## When to use this + +- **You already manage `CREATE PROPERTY GRAPH` in Terraform / dbt / a SQL file.** The SDK's `CREATE OR REPLACE PROPERTY GRAPH` would clobber your DDL on every run. +- **Your property graph definition uses options the SDK doesn't generate.** You hand-authored the graph DDL to express features (custom labels, additional indexes, dialect-specific options) the SDK's compiler doesn't emit. +- **You want to populate your tables on a different cadence than you redefine the graph.** The graph definition rarely changes; the data is refreshed continuously. + +For all other cases, leave the flag off and let the SDK manage the property graph end-to-end. + +## Python API + +The flag is also available on `build_ontology_graph(...)`: + +```python +from bigquery_agent_analytics.ontology_orchestrator import build_ontology_graph + +result = build_ontology_graph( + spec=resolved_spec, + session_ids=["sess-1"], + project_id="my-project", + dataset_id="my-dataset", + skip_property_graph=True, # phase 5 skipped +) + +assert result["property_graph_status"] == "skipped:user_requested" +assert result["skipped_reason"] == "user_requested" +assert result["property_graph_created"] is False +``` + +`skipped_reason` is only present when the phase was skipped; it is omitted when phase 5 ran (whether or not it succeeded). diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py index 808cc62..c056234 100644 --- a/tests/test_integration_ontology_binding.py +++ b/tests/test_integration_ontology_binding.py @@ -328,6 +328,137 @@ def test_create_graph_and_query( assert len(rows) > 0, "GQL query returned 0 rows" +class TestSkipPropertyGraph: + """Live test that --skip-property-graph does not run CREATE PROPERTY GRAPH. + + Issue #104 acceptance: "creates a pre-existing property graph, runs + ontology-build --skip-property-graph against pre-existing base tables, + and verifies the user's graph definition is unchanged after the run." + + Verified by: + - Capturing a timestamp after creating the user's CREATE PROPERTY + GRAPH directly (not via the SDK). + - Running build_ontology_graph(..., skip_property_graph=True). + - Querying INFORMATION_SCHEMA.JOBS_BY_PROJECT for any + 'CREATE OR REPLACE PROPERTY GRAPH' jobs in the post-timestamp + window. Asserting zero. + - Asserting the GQL query against the user's graph still works + after the SDK run (graph object intact, base tables refreshed). + """ + + def test_skip_property_graph_issues_no_create_graph_job( + self, ontology_and_binding, lineage_config, scratch_dataset + ): + from google.cloud import bigquery + + from bigquery_agent_analytics.ontology_materializer import ( + OntologyMaterializer, + ) + from bigquery_agent_analytics.ontology_orchestrator import ( + build_ontology_graph, + ) + from bigquery_agent_analytics.ontology_orchestrator import ( + compile_showcase_gql, + ) + from bigquery_agent_analytics.ontology_property_graph import ( + OntologyPropertyGraphCompiler, + ) + from bigquery_agent_analytics.resolved_spec import resolve + + ontology, binding = ontology_and_binding + spec = resolve(ontology, binding, lineage_config=lineage_config) + + # Step 1: create base tables (idempotent), then create the user's + # property graph via direct SQL (simulating Terraform/dbt-managed + # DDL the SDK should NOT touch when --skip-property-graph is set). + mat = OntologyMaterializer.from_ontology_binding( + ontology=ontology, + binding=binding, + lineage_config=lineage_config, + write_mode="batch_load", + ) + mat.create_tables() + + compiler = OntologyPropertyGraphCompiler.from_ontology_binding( + ontology=ontology, + binding=binding, + lineage_config=lineage_config, + ) + assert compiler.create_property_graph() is True + + # Step 2: capture the "before" timestamp AFTER the authored DDL + # has finished so the JOBS_BY_PROJECT filter does not catch our + # own setup job. Bind via a SQL CURRENT_TIMESTAMP() round-trip so + # the timestamp is BQ-aligned. + client = bigquery.Client(project=_PROJECT, location=_LOCATION) + before_ts_row = next( + iter(client.query("SELECT CURRENT_TIMESTAMP() AS ts").result()) + ) + before_skip_build_ts = before_ts_row.ts + + # Step 3: run build_ontology_graph with skip_property_graph=True. + result = build_ontology_graph( + spec=spec, + session_ids=[_SESSION], + project_id=_PROJECT, + dataset_id=scratch_dataset, + graph_name=spec.name, + location=_LOCATION, + skip_property_graph=True, + ) + + assert result["property_graph_created"] is False + assert result["property_graph_status"] == "skipped:user_requested" + assert result["skipped_reason"] == "user_requested" + + # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran in + # the post-timestamp window. + region_qual = f"`region-{_LOCATION.lower()}`" + jobs_query = f""" + SELECT job_id, query, creation_time + FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT + WHERE creation_time > @before + AND UPPER(query) LIKE '%CREATE OR REPLACE PROPERTY GRAPH%' + """ + job = client.query( + jobs_query, + job_config=bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter( + "before", "TIMESTAMP", before_skip_build_ts + ), + ] + ), + ) + create_graph_jobs = list(job.result()) + assert len(create_graph_jobs) == 0, ( + "Expected zero CREATE OR REPLACE PROPERTY GRAPH jobs after " + f"build_ontology_graph(skip_property_graph=True), got " + f"{len(create_graph_jobs)}: " + f"{[j.job_id for j in create_graph_jobs]}" + ) + + # Step 5: assert the user's graph object still works. Run the + # showcase GQL query — it should succeed (graph definition is + # intact) even though it may return zero rows if the test + # session_id has no matching edges in this scratch dataset. + gql = compile_showcase_gql(spec, _PROJECT, scratch_dataset) + gql_job = client.query( + gql, + job_config=bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ScalarQueryParameter( + "session_id", "STRING", _SESSION + ), + bigquery.ScalarQueryParameter("result_limit", "INT64", 50), + ] + ), + ) + # Result iteration confirms BigQuery accepted the GQL against + # the user's pre-existing property graph. + list(gql_job.result()) + + class TestLineageEndToEnd: """Live lineage detection + GQL via from_ontology_binding.""" From 548046570a58313ec858886b323bc317e5277a40 Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Sat, 2 May 2026 18:43:05 -0700 Subject: [PATCH 3/6] test+docs: harden live test, add text-format check, link doc (#104) Addresses three review findings on PR #108: (1) Live test now exercises real extraction/materialization - Pass dataset_id=_DATASET, table_id=_TABLE so extraction reads the production agent_events table where YMGO ADCP session data lives. Materializer still writes to scratch_dataset because spec entity sources arrive 3-part-qualified to binding.target.dataset via _qualify_source (resolved_spec.py:141). - Assert sum(rows_materialized.values()) > 0 to catch the silent- empty-graph trap where ontology_graph.py:683 returns an empty ExtractedGraph if extraction fails (e.g. wrong source dataset). (2) JOBS_BY_PROJECT assertion narrowed to the test's own graph - Filter by both 'CREATE OR REPLACE PROPERTY GRAPH' keyword AND the fully-qualified graph reference ({_PROJECT}.{scratch_dataset}.{spec.name}). Prevents false-fail on unrelated CREATE OR REPLACE PROPERTY GRAPH jobs running concurrently in the same project from other tests/developers. (3) docs/README.md gains a row for the new ontology-build doc. (4) New CLI test test_skip_property_graph_status_visible_in_text_format asserts property_graph_status appears in --format=text output, pinning the contract that the status field is not JSON-only. 7/7 ontology-build CLI tests pass. --- docs/README.md | 1 + tests/test_cli.py | 42 ++++++++++++++++++++++ tests/test_integration_ontology_binding.py | 36 +++++++++++++++++-- 3 files changed, 76 insertions(+), 3 deletions(-) diff --git a/docs/README.md b/docs/README.md index f8bbac0..dcb9d3c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -36,6 +36,7 @@ architecture, rationale, and implementation plans behind key SDK features. | [ontology/compilation.md](ontology/compilation.md) | Compilation — resolving ontology + binding into backend DDL | | [ontology/cli.md](ontology/cli.md) | CLI design for the `gm` tool (validate, compile, import-owl) | | [ontology/owl-import.md](ontology/owl-import.md) | OWL import — converting OWL ontologies to YAML format | +| [ontology/ontology-build.md](ontology/ontology-build.md) | `bq-agent-sdk ontology-build` orchestrator + `--skip-property-graph` reference | ## Deployment Surfaces diff --git a/tests/test_cli.py b/tests/test_cli.py index a94e599..4564b99 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2547,6 +2547,48 @@ def test_default_invocation_omits_skip_flag(self, mock_build): _, kwargs = mock_build.call_args assert kwargs["skip_property_graph"] is False + @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph") + def test_skip_property_graph_status_visible_in_text_format( + self, mock_build + ): + """--format=text exposes property_graph_status to non-JSON consumers. + + Pins the contract that property_graph_status is not JSON-only: + --format=table renders dict keys; --format=text falls back to a + readable representation. The status string must appear in either. + """ + from bigquery_agent_analytics.ontology_models import ExtractedGraph + + mock_build.return_value = { + "graph_name": "g", + "graph_ref": "proj.ds.g", + "graph": ExtractedGraph(name="test"), + "tables_created": {}, + "rows_materialized": {}, + "property_graph_created": False, + "skipped_reason": "user_requested", + "property_graph_status": "skipped:user_requested", + "spec": MagicMock(), + } + + result = runner.invoke( + app, + [ + "ontology-build", + "--project-id=proj", + "--dataset-id=ds", + f"--spec-path={self._SPEC_PATH}", + "--session-ids=sess1", + "--env=p.d", + "--skip-property-graph", + "--format=text", + ], + ) + assert result.exit_code == 0 + # The status string must appear in the text-format output so non- + # JSON consumers can see why the graph was not created. + assert "skipped:user_requested" in result.output + @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph") def test_property_graph_failure_status_failed(self, mock_build): """When the orchestrator reports failure, exit 1 with status='failed'. diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py index c056234..79a28b6 100644 --- a/tests/test_integration_ontology_binding.py +++ b/tests/test_integration_ontology_binding.py @@ -397,11 +397,20 @@ def test_skip_property_graph_issues_no_create_graph_job( before_skip_build_ts = before_ts_row.ts # Step 3: run build_ontology_graph with skip_property_graph=True. + # Extraction reads from the real _DATASET.agent_events table where + # the YMGO ADCP session data lives. Materialization writes to + # scratch_dataset because spec entity sources are already + # 3-part-qualified to binding.target.dataset = scratch_dataset + # (see _qualify_source at resolved_spec.py:141), so the + # materializer ignores its dataset_id parameter for output table + # location. The result: extract from prod-like, materialize to + # scratch — exactly the user-facing flow the test should exercise. result = build_ontology_graph( spec=spec, session_ids=[_SESSION], project_id=_PROJECT, - dataset_id=scratch_dataset, + dataset_id=_DATASET, + table_id=_TABLE, graph_name=spec.name, location=_LOCATION, skip_property_graph=True, @@ -410,15 +419,31 @@ def test_skip_property_graph_issues_no_create_graph_job( assert result["property_graph_created"] is False assert result["property_graph_status"] == "skipped:user_requested" assert result["skipped_reason"] == "user_requested" + # Phases 1-4 must have actually populated the scratch tables. + # Catches the silent-empty-graph trap where extraction can fail + # (e.g. wrong source dataset) and ontology_graph.py:683 returns + # an empty ExtractedGraph rather than raising. + rows_total = sum(result["rows_materialized"].values()) + assert rows_total > 0, ( + "Expected at least 1 row materialized after skip-flag run, " + f"got rows_materialized={result['rows_materialized']!r}. " + "Extraction may have silently returned an empty graph." + ) - # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran in - # the post-timestamp window. + # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran for + # *this scratch dataset's graph* in the post-timestamp window. + # Filter by both the DDL keyword and the fully-qualified graph + # reference so the test does not false-fail on an unrelated + # CREATE OR REPLACE PROPERTY GRAPH issued by another developer + # or test running concurrently in the same project. + expected_graph_ref = f"{_PROJECT}.{scratch_dataset}.{spec.name}" region_qual = f"`region-{_LOCATION.lower()}`" jobs_query = f""" SELECT job_id, query, creation_time FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT WHERE creation_time > @before AND UPPER(query) LIKE '%CREATE OR REPLACE PROPERTY GRAPH%' + AND query LIKE @graph_ref_pattern """ job = client.query( jobs_query, @@ -427,6 +452,11 @@ def test_skip_property_graph_issues_no_create_graph_job( bigquery.ScalarQueryParameter( "before", "TIMESTAMP", before_skip_build_ts ), + bigquery.ScalarQueryParameter( + "graph_ref_pattern", + "STRING", + f"%{expected_graph_ref}%", + ), ] ), ) From 5e53b61446fa80eaf1a33e172888361967bda022 Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Sat, 2 May 2026 20:09:50 -0700 Subject: [PATCH 4/6] test+docs: harden DDL-detection filter, soften DDL claims (#104) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses three review findings on PR #108: (1) Live test DDL-detection blind spot The previous filter required the regressed CREATE OR REPLACE PROPERTY GRAPH to target _PROJECT... But if skip_property_graph regressed, the compiler would actually target _PROJECT._DATASET. (the orchestrator's dataset_id argument is _DATASET in this test, used for extraction of agent_events). The blind spot: a regression could fire DDL that the test would not catch. Fixed by replacing the fully-qualified-graph-ref filter with two narrower constraints that catch the regression in either dataset: - graph name (spec.name) — present in the DDL string regardless of which dataset the compiler targets - sdk_feature='ontology-gql' label — only SDK-issued property-graph jobs carry this label per ontology_property_graph.py:465; the test's setup CREATE PROPERTY GRAPH (issued via direct SQL) does not, so it does not trip the assertion (2) docs/ontology/ontology-build.md: document graph_ref limitation Added a "Known limitation" section noting that result["graph_ref"] reports the extraction dataset, not the binding's target dataset, in split source/target setups. The materialized base tables themselves still go to the binding's target dataset per the resolved spec; only the reported string is affected. (3) docs/ontology/ontology-build.md: soften DDL-options wording "additional indexes, dialect-specific options" was overreaching for BigQuery property graphs; tightened to "custom labels or other DDL details the SDK's compiler doesn't generate." 136/136 tests pass. --- docs/ontology/ontology-build.md | 6 +++- tests/test_integration_ontology_binding.py | 40 ++++++++++++++++------ 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/docs/ontology/ontology-build.md b/docs/ontology/ontology-build.md index b706735..ab72da2 100644 --- a/docs/ontology/ontology-build.md +++ b/docs/ontology/ontology-build.md @@ -56,7 +56,7 @@ Without `--skip-property-graph`, the existing exit-1 behavior on graph-create fa ## When to use this - **You already manage `CREATE PROPERTY GRAPH` in Terraform / dbt / a SQL file.** The SDK's `CREATE OR REPLACE PROPERTY GRAPH` would clobber your DDL on every run. -- **Your property graph definition uses options the SDK doesn't generate.** You hand-authored the graph DDL to express features (custom labels, additional indexes, dialect-specific options) the SDK's compiler doesn't emit. +- **Your property graph definition uses DDL details the SDK compiler doesn't emit.** You hand-authored the graph DDL to express custom labels or other DDL details the SDK's compiler doesn't generate. - **You want to populate your tables on a different cadence than you redefine the graph.** The graph definition rarely changes; the data is refreshed continuously. For all other cases, leave the flag off and let the SDK manage the property graph end-to-end. @@ -82,3 +82,7 @@ assert result["property_graph_created"] is False ``` `skipped_reason` is only present when the phase was skipped; it is omitted when phase 5 ran (whether or not it succeeded). + +## Known limitation: `result["graph_ref"]` in split source/target setups + +`build_ontology_graph(...)` accepts a single `dataset_id` and uses it both for extraction (where `agent_events` lives) and for the `graph_ref` reported in the result dict (`{project_id}.{dataset_id}.{name}`). When `--skip-property-graph` is set and the caller's actual property graph lives in `binding.target.dataset` (different from the `dataset_id` used for extraction), `result["graph_ref"]` reports the **extraction dataset**, not the user-owned graph's dataset. The materialized base tables themselves still go to `binding.target.dataset` per the resolved spec — this only affects the reported `graph_ref` string. Tracked as a follow-up; not blocking for `--skip-property-graph` itself since the user already knows where their authored graph lives. diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py index 79a28b6..5bbaae3 100644 --- a/tests/test_integration_ontology_binding.py +++ b/tests/test_integration_ontology_binding.py @@ -431,19 +431,39 @@ def test_skip_property_graph_issues_no_create_graph_job( ) # Step 4: assert no CREATE OR REPLACE PROPERTY GRAPH job ran for - # *this scratch dataset's graph* in the post-timestamp window. - # Filter by both the DDL keyword and the fully-qualified graph - # reference so the test does not false-fail on an unrelated - # CREATE OR REPLACE PROPERTY GRAPH issued by another developer - # or test running concurrently in the same project. - expected_graph_ref = f"{_PROJECT}.{scratch_dataset}.{spec.name}" + # *this test's graph* in the post-timestamp window. + # + # Filter design: + # 1. timestamp > the post-DDL baseline (closes the trap from + # #107 cell 1.3 where the user's own setup CREATE PROPERTY + # GRAPH would otherwise be caught). + # 2. DDL keyword. + # 3. graph name (spec.name) — the graph name is in the DDL + # string regardless of which dataset the compiler would + # target. If skip_property_graph regresses, the compiler + # runs with dataset_id=_DATASET (the orchestrator's + # argument), so the regressed DDL would target + # _PROJECT._DATASET., NOT + # _PROJECT... Filtering on the + # graph name (rather than the fully-qualified ref) catches + # the regression in either dataset. + # 4. sdk_feature='ontology-gql' label — only SDK-issued + # property-graph jobs carry this label + # (ontology_property_graph.py:465), so unrelated user- + # authored CREATE PROPERTY GRAPH DDLs (including the test's + # own setup job in step 1, which was not labeled this way) + # do not trip the assertion. region_qual = f"`region-{_LOCATION.lower()}`" jobs_query = f""" SELECT job_id, query, creation_time - FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT + FROM {region_qual}.INFORMATION_SCHEMA.JOBS_BY_PROJECT AS j WHERE creation_time > @before AND UPPER(query) LIKE '%CREATE OR REPLACE PROPERTY GRAPH%' - AND query LIKE @graph_ref_pattern + AND query LIKE @graph_name_pattern + AND EXISTS ( + SELECT 1 FROM UNNEST(j.labels) AS l + WHERE l.key = 'sdk_feature' AND l.value = 'ontology-gql' + ) """ job = client.query( jobs_query, @@ -453,9 +473,9 @@ def test_skip_property_graph_issues_no_create_graph_job( "before", "TIMESTAMP", before_skip_build_ts ), bigquery.ScalarQueryParameter( - "graph_ref_pattern", + "graph_name_pattern", "STRING", - f"%{expected_graph_ref}%", + f"%{spec.name}%", ), ] ), From e44967bc132df5ecb1b145920ffc991227aff4d4 Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Sat, 2 May 2026 20:12:18 -0700 Subject: [PATCH 5/6] test: correct comment on label-filter rationale (#104) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous comment claimed the test's setup CREATE PROPERTY GRAPH job did not carry the sdk_feature='ontology-gql' label. That was factually wrong: setup goes through OntologyPropertyGraphCompiler.create_property_graph() (line 387), which does carry the label. The test logic was already correct — the setup job is excluded by the post-setup timestamp captured in step 2, not by the label filter. The label filter excludes user-authored raw SQL DDL jobs (without SDK labels), which is its actual purpose. Only the comment needed to change. No code change. --- tests/test_integration_ontology_binding.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py index 5bbaae3..8c44e35 100644 --- a/tests/test_integration_ontology_binding.py +++ b/tests/test_integration_ontology_binding.py @@ -449,10 +449,12 @@ def test_skip_property_graph_issues_no_create_graph_job( # the regression in either dataset. # 4. sdk_feature='ontology-gql' label — only SDK-issued # property-graph jobs carry this label - # (ontology_property_graph.py:465), so unrelated user- - # authored CREATE PROPERTY GRAPH DDLs (including the test's - # own setup job in step 1, which was not labeled this way) - # do not trip the assertion. + # (ontology_property_graph.py:465). The setup CREATE PROPERTY + # GRAPH job in step 1 *also* uses this label (it goes through + # OntologyPropertyGraphCompiler.create_property_graph()), but + # it is excluded by the post-setup timestamp captured in + # step 2. User-authored raw SQL DDL jobs without SDK labels + # are excluded by this label filter. region_qual = f"`region-{_LOCATION.lower()}`" jobs_query = f""" SELECT job_id, query, creation_time From fcd7d9ce6a385234a447f71d088d5da2a31c1741 Mon Sep 17 00:00:00 2001 From: Haiyuan Cao Date: Sat, 2 May 2026 23:25:18 -0700 Subject: [PATCH 6/6] style: apply autoformat to test files Run bash autoformat.sh (isort + pyink). Fixes the Format check CI job that was failing on PR #108. No behavior change. --- tests/test_cli.py | 4 +--- tests/test_integration_ontology_binding.py | 20 +++++--------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 4564b99..2145575 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -2548,9 +2548,7 @@ def test_default_invocation_omits_skip_flag(self, mock_build): assert kwargs["skip_property_graph"] is False @patch("bigquery_agent_analytics.ontology_orchestrator.build_ontology_graph") - def test_skip_property_graph_status_visible_in_text_format( - self, mock_build - ): + def test_skip_property_graph_status_visible_in_text_format(self, mock_build): """--format=text exposes property_graph_status to non-JSON consumers. Pins the contract that property_graph_status is not JSON-only: diff --git a/tests/test_integration_ontology_binding.py b/tests/test_integration_ontology_binding.py index 8c44e35..323e7b1 100644 --- a/tests/test_integration_ontology_binding.py +++ b/tests/test_integration_ontology_binding.py @@ -351,18 +351,10 @@ def test_skip_property_graph_issues_no_create_graph_job( ): from google.cloud import bigquery - from bigquery_agent_analytics.ontology_materializer import ( - OntologyMaterializer, - ) - from bigquery_agent_analytics.ontology_orchestrator import ( - build_ontology_graph, - ) - from bigquery_agent_analytics.ontology_orchestrator import ( - compile_showcase_gql, - ) - from bigquery_agent_analytics.ontology_property_graph import ( - OntologyPropertyGraphCompiler, - ) + from bigquery_agent_analytics.ontology_materializer import OntologyMaterializer + from bigquery_agent_analytics.ontology_orchestrator import build_ontology_graph + from bigquery_agent_analytics.ontology_orchestrator import compile_showcase_gql + from bigquery_agent_analytics.ontology_property_graph import OntologyPropertyGraphCompiler from bigquery_agent_analytics.resolved_spec import resolve ontology, binding = ontology_and_binding @@ -499,9 +491,7 @@ def test_skip_property_graph_issues_no_create_graph_job( gql, job_config=bigquery.QueryJobConfig( query_parameters=[ - bigquery.ScalarQueryParameter( - "session_id", "STRING", _SESSION - ), + bigquery.ScalarQueryParameter("session_id", "STRING", _SESSION), bigquery.ScalarQueryParameter("result_limit", "INT64", 50), ] ),